diff --git a/cmd/kaf/produce.go b/cmd/kaf/produce.go
index a409ce37..557aed0a 100644
--- a/cmd/kaf/produce.go
+++ b/cmd/kaf/produce.go
@@ -6,12 +6,14 @@ import (
"os"
"github.com/Shopify/sarama"
+ "github.com/burdiyan/kafkautil"
pb "github.com/golang/protobuf/proto"
"github.com/spf13/cobra"
)
var keyFlag string
var numFlag int
+var partitionerFlag string
func init() {
rootCmd.AddCommand(produceCmd)
@@ -22,6 +24,7 @@ func init() {
produceCmd.Flags().StringSliceVar(&protoFiles, "proto-include", []string{}, "Path to proto files")
produceCmd.Flags().StringSliceVar(&protoExclude, "proto-exclude", []string{}, "Proto exclusions (path prefixes)")
produceCmd.Flags().StringVar(&protoType, "proto-type", "", "Fully qualified name of the proto message type. Example: com.test.SampleMessage")
+ produceCmd.Flags().StringVar(&partitionerFlag, "partitioner", "", "Select partitioner: Default or jvm")
}
@@ -31,7 +34,11 @@ var produceCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
PreRun: setupProtoDescriptorRegistry,
Run: func(cmd *cobra.Command, args []string) {
- producer, err := sarama.NewSyncProducer(currentCluster.Brokers, getConfig())
+ cfg := getConfig()
+ if partitionerFlag != "" {
+ cfg.Producer.Partitioner = kafkautil.NewJVMCompatiblePartitioner
+ }
+ producer, err := sarama.NewSyncProducer(currentCluster.Brokers, cfg)
if err != nil {
errorExit("Unable to create new sync producer: %v\n", err)
}
diff --git a/go.mod b/go.mod
index e78cf842..93158ee5 100644
--- a/go.mod
+++ b/go.mod
@@ -8,6 +8,8 @@ require (
github.com/Shopify/sarama v1.23.0
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/avast/retry-go v2.4.1+incompatible // indirect
+ github.com/bsm/sarama-cluster v2.1.15+incompatible // indirect
+ github.com/burdiyan/kafkautil v0.0.0-20190131162249-eaf83ed22d5b
github.com/eapache/go-resiliency v1.2.0 // indirect
github.com/fatih/color v1.7.0 // indirect
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect
@@ -16,14 +18,19 @@ require (
github.com/jcmturner/gofork v1.0.0 // indirect
github.com/jhump/protoreflect v1.5.0
github.com/linkedin/goavro v2.1.0+incompatible
+ github.com/lovoo/goka v0.1.4 // indirect
github.com/magiconair/properties v1.8.1
github.com/manifoldco/promptui v0.3.2
github.com/mattn/go-colorable v0.1.2
github.com/mitchellh/go-homedir v1.1.0
github.com/nicksnyder/go-i18n v1.10.1 // indirect
github.com/pierrec/lz4 v2.0.5+incompatible // indirect
+ github.com/pkg/errors v0.9.1 // indirect
github.com/rcrowley/go-metrics v0.0.0-20190706150252-9beb055b7962 // indirect
+ github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da // indirect
github.com/spf13/cobra v0.0.5
+ github.com/syndtr/goleveldb v1.0.0 // indirect
+ github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a // indirect
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c
golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 // indirect
diff --git a/go.sum b/go.sum
index c11f63fc..f8af688b 100644
--- a/go.sum
+++ b/go.sum
@@ -17,6 +17,11 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/avast/retry-go v2.4.1+incompatible h1:WMHc0mwoz20UVmBYK89mUB/KFRlxO0p+s+sgpmJMviY=
github.com/avast/retry-go v2.4.1+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
+github.com/bsm/sarama-cluster v1.0.3 h1:EYUVzny7Ptwfci5V3zeKhhX0CruB5rkkI71lz70CXRM=
+github.com/bsm/sarama-cluster v2.1.15+incompatible h1:RkV6WiNRnqEEbp81druK8zYhmnIgdOjqSVi0+9Cnl2A=
+github.com/bsm/sarama-cluster v2.1.15+incompatible/go.mod h1:r7ao+4tTNXvWm+VRpRJchr2kQhqxgmAp2iEX5W96gMM=
+github.com/burdiyan/kafkautil v0.0.0-20190131162249-eaf83ed22d5b h1:gRFujk0F/KYFDEalhpaAbLIwmeiDH53ZgdllJ7UHxyQ=
+github.com/burdiyan/kafkautil v0.0.0-20190131162249-eaf83ed22d5b/go.mod h1:5hrpM9I1h0fZlTk8JhqaaBaCs76EbCGvFcPtm5SxcCU=
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
@@ -47,8 +52,10 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekf
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/lint v0.0.0-20181026193005-c67002cb31c3 h1:I4BOK3PBMjhWfQM2zPJKK7lOBGsrsvOB7kBELP33hiE=
github.com/golang/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf h1:7+FW5aGwISbqUtkfmIpZJGRgNFg2ioYPvFaUxdqpDsg=
@@ -62,6 +69,7 @@ github.com/hokaccha/go-prettyjson v0.0.0-20180920040306-f579f869bbfe h1:MCgzztuo
github.com/hokaccha/go-prettyjson v0.0.0-20180920040306-f579f869bbfe/go.mod h1:pFlLw2CfqZiIBOx6BuCeRLCrfxBJipTY0nIOF/VbGcI=
github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e h1:0aewS5NTyxftZHSnFaJmWE5oCCrj4DyEXkAiMa1iZJM=
github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e/go.mod h1:pFlLw2CfqZiIBOx6BuCeRLCrfxBJipTY0nIOF/VbGcI=
+github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03 h1:FUwcHNlEqkqLjLBdCp5PRlCFijNjvcYANOZXzCfXwCM=
@@ -79,6 +87,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/linkedin/goavro v2.1.0+incompatible h1:DV2aUlj2xZiuxQyvag8Dy7zjY69ENjS66bWkSfdpddY=
github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM=
+github.com/lovoo/goka v0.1.4 h1:OXca3VFs4XNYbAKabN0KghQVHcTPqY/K2TdCfiIIt98=
+github.com/lovoo/goka v0.1.4/go.mod h1:jycJV5w5O/zr22OJpE34lPNymbvDhiaoJ41shxKD8cQ=
github.com/lunixbochs/vtclean v0.0.0-20180621232353-2d01aacdc34a h1:weJVJJRzAJBFRlAiJQROKQs8oC9vOxvm4rZmBBk0ONw=
github.com/lunixbochs/vtclean v0.0.0-20180621232353-2d01aacdc34a/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
@@ -100,12 +110,17 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/nicksnyder/go-i18n v1.10.1 h1:isfg77E/aCD7+0lD/D00ebR2MV5vgeQ276WYyDaCRQc=
github.com/nicksnyder/go-i18n v1.10.1/go.mod h1:e4Di5xjP9oTVrC6y3C7C0HoSYXjSbhh/dU0eUV32nB4=
+github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pierrec/lz4 v0.0.0-20190327172049-315a67e90e41 h1:GeinFsrjWz97fAxVUEd748aV0cYL+I6k44gFJTCVvpU=
github.com/pierrec/lz4 v0.0.0-20190327172049-315a67e90e41/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc=
github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I=
github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -114,6 +129,8 @@ github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqn
github.com/rcrowley/go-metrics v0.0.0-20190706150252-9beb055b7962 h1:eUm8ma4+yPknhXtkYlWh3tMkE6gBjXZToDned9s2gbQ=
github.com/rcrowley/go-metrics v0.0.0-20190706150252-9beb055b7962/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
+github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da h1:p3Vo3i64TCLY7gIfzeQaUJ+kppEO5WQG3cL8iE8tGHU=
+github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
@@ -126,9 +143,13 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
+github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/tsenart/deadcode v0.0.0-20160724212837-210d2dc333e9 h1:vY5WqiEon0ZSTGM3ayVVi+twaHKHDFUVloaQ/wug9/c=
github.com/tsenart/deadcode v0.0.0-20160724212837-210d2dc333e9/go.mod h1:q+QjxYvZ+fpjMXqs+XEriussHjSYqeXVnAdSV1tkMYk=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
+github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a h1:ILoU84rj4AQ3q6cjQvtb9jBjx4xzR/Riq/zYhmDQiOk=
+github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a/go.mod h1:vQQATAGxVK20DC1rRubTJbZDDhhpA4QfU02pMdPxGO4=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c h1:u40Z8hqBAAQyv+vATcGgV0YCnDjqSL7/q/JyPhhJSPk=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0=
@@ -141,12 +162,16 @@ golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaE
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3 h1:x/bBzNauLQAlE3fLku/xy92Y8QwKX5HZymrMz2IiKFc=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -171,6 +196,7 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw=
gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo=
gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM=
@@ -185,6 +211,7 @@ gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU=
gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8=
gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4=
gopkg.in/linkedin/goavro.v1 v1.0.5/go.mod h1:Aw5GdAbizjOEl0kAMHV9iHmA8reZzW/OKuJAl4Hb9F0=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/vendor/github.com/avast/retry-go/.gitignore b/vendor/github.com/avast/retry-go/.gitignore
new file mode 100644
index 00000000..c40eb23f
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/.gitignore
@@ -0,0 +1,21 @@
+# Binaries for programs and plugins
+*.exe
+*.dll
+*.so
+*.dylib
+
+# Test binary, build with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
+.glide/
+
+# dep
+vendor/
+Gopkg.lock
+
+# cover
+coverage.txt
diff --git a/vendor/github.com/avast/retry-go/.godocdown.tmpl b/vendor/github.com/avast/retry-go/.godocdown.tmpl
new file mode 100644
index 00000000..6873edf8
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/.godocdown.tmpl
@@ -0,0 +1,37 @@
+# {{ .Name }}
+
+[![Release](https://img.shields.io/github/release/avast/retry-go.svg?style=flat-square)](https://github.com/avast/retry-go/releases/latest)
+[![Software License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE.md)
+[![Travis](https://img.shields.io/travis/avast/retry-go.svg?style=flat-square)](https://travis-ci.org/avast/retry-go)
+[![AppVeyor](https://ci.appveyor.com/api/projects/status/fieg9gon3qlq0a9a?svg=true)](https://ci.appveyor.com/project/JaSei/retry-go)
+[![Go Report Card](https://goreportcard.com/badge/github.com/avast/retry-go?style=flat-square)](https://goreportcard.com/report/github.com/avast/retry-go)
+[![GoDoc](https://godoc.org/github.com/avast/retry-go?status.svg&style=flat-square)](http://godoc.org/github.com/avast/retry-go)
+[![codecov.io](https://codecov.io/github/avast/retry-go/coverage.svg?branch=master)](https://codecov.io/github/avast/retry-go?branch=master)
+[![Sourcegraph](https://sourcegraph.com/github.com/avast/retry-go/-/badge.svg)](https://sourcegraph.com/github.com/avast/retry-go?badge)
+
+{{ .EmitSynopsis }}
+
+{{ .EmitUsage }}
+
+## Contributing
+
+Contributions are very much welcome.
+
+### Makefile
+
+Makefile provides several handy rules, like README.md `generator` , `setup` for prepare build/dev environment, `test`, `cover`, etc...
+
+Try `make help` for more information.
+
+### Before pull request
+
+please try:
+* run tests (`make test`)
+* run linter (`make lint`)
+* if your IDE don't automaticaly do `go fmt`, run `go fmt` (`make fmt`)
+
+### README
+
+README.md are generate from template [.godocdown.tmpl](.godocdown.tmpl) and code documentation via [godocdown](https://github.com/robertkrimen/godocdown).
+
+Never edit README.md direct, because your change will be lost.
diff --git a/vendor/github.com/avast/retry-go/.travis.yml b/vendor/github.com/avast/retry-go/.travis.yml
new file mode 100644
index 00000000..a0c14a0e
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/.travis.yml
@@ -0,0 +1,19 @@
+language: go
+
+go:
+ - 1.6
+ - 1.7
+ - 1.8
+ - 1.9
+ - "1.10"
+ - 1.11
+ - 1.12
+
+install:
+ - make setup
+
+script:
+ - make ci
+
+after_success:
+ - bash <(curl -s https://codecov.io/bash)
diff --git a/vendor/github.com/avast/retry-go/Gopkg.toml b/vendor/github.com/avast/retry-go/Gopkg.toml
new file mode 100644
index 00000000..cf8c9eb0
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/Gopkg.toml
@@ -0,0 +1,3 @@
+[[constraint]]
+ name = "github.com/stretchr/testify"
+ version = "1.1.4"
diff --git a/vendor/github.com/avast/retry-go/LICENSE b/vendor/github.com/avast/retry-go/LICENSE
new file mode 100644
index 00000000..f63fca81
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Avast
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/avast/retry-go/Makefile b/vendor/github.com/avast/retry-go/Makefile
new file mode 100644
index 00000000..769816d2
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/Makefile
@@ -0,0 +1,65 @@
+SOURCE_FILES?=$$(go list ./... | grep -v /vendor/)
+TEST_PATTERN?=.
+TEST_OPTIONS?=
+DEP?=$$(which dep)
+VERSION?=$$(cat VERSION)
+LINTER?=$$(which golangci-lint)
+LINTER_VERSION=1.15.0
+
+ifeq ($(OS),Windows_NT)
+ DEP_VERS=dep-windows-amd64
+ LINTER_FILE=golangci-lint-$(LINTER_VERSION)-windows-amd64.zip
+ LINTER_UNPACK= >| app.zip; unzip -j app.zip -d $$GOPATH/bin; rm app.zip
+else ifeq ($(OS), Darwin)
+ LINTER_FILE=golangci-lint-$(LINTER_VERSION)-darwin-amd64.tar.gz
+ LINTER_UNPACK= | tar xzf - -C $$GOPATH/bin --wildcards --strip 1 "**/golangci-lint"
+else
+ DEP_VERS=dep-linux-amd64
+ LINTER_FILE=golangci-lint-$(LINTER_VERSION)-linux-amd64.tar.gz
+ LINTER_UNPACK= | tar xzf - -C $$GOPATH/bin --wildcards --strip 1 "**/golangci-lint"
+endif
+
+setup:
+ go get -u github.com/pierrre/gotestcover
+ go get -u golang.org/x/tools/cmd/cover
+ go get -u github.com/robertkrimen/godocdown/godocdown
+ @if [ "$(LINTER)" = "" ]; then\
+ curl -L https://github.com/golangci/golangci-lint/releases/download/v$(LINTER_VERSION)/$(LINTER_FILE) $(LINTER_UNPACK) ;\
+ chmod +x $$GOPATH/bin/golangci-lint;\
+ fi
+ @if [ "$(DEP)" = "" ]; then\
+ curl -L https://github.com/golang/dep/releases/download/v0.3.1/$(DEP_VERS) >| $$GOPATH/bin/dep;\
+ chmod +x $$GOPATH/bin/dep;\
+ fi
+ dep ensure
+
+generate: ## Generate README.md
+ godocdown >| README.md
+
+test: generate test_and_cover_report lint
+
+test_and_cover_report:
+ gotestcover $(TEST_OPTIONS) -covermode=atomic -coverprofile=coverage.txt $(SOURCE_FILES) -run $(TEST_PATTERN) -timeout=2m
+
+cover: test ## Run all the tests and opens the coverage report
+ go tool cover -html=coverage.txt
+
+fmt: ## gofmt and goimports all go files
+ find . -name '*.go' -not -wholename './vendor/*' | while read -r file; do gofmt -w -s "$$file"; goimports -w "$$file"; done
+
+lint: ## Run all the linters
+ golangci-lint run
+
+ci: test_and_cover_report ## Run all the tests but no linters - use https://golangci.com integration instead
+
+build:
+ go build
+
+release: ## Release new version
+ git tag | grep -q $(VERSION) && echo This version was released! Increase VERSION! || git tag $(VERSION) && git push origin $(VERSION) && git tag v$(VERSION) && git push origin v$(VERSION)
+
+# Absolutely awesome: http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
+help:
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+.DEFAULT_GOAL := build
diff --git a/vendor/github.com/avast/retry-go/README.md b/vendor/github.com/avast/retry-go/README.md
new file mode 100644
index 00000000..b282110e
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/README.md
@@ -0,0 +1,291 @@
+# retry
+
+[![Release](https://img.shields.io/github/release/avast/retry-go.svg?style=flat-square)](https://github.com/avast/retry-go/releases/latest)
+[![Software License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE.md)
+[![Travis](https://img.shields.io/travis/avast/retry-go.svg?style=flat-square)](https://travis-ci.org/avast/retry-go)
+[![AppVeyor](https://ci.appveyor.com/api/projects/status/fieg9gon3qlq0a9a?svg=true)](https://ci.appveyor.com/project/JaSei/retry-go)
+[![Go Report Card](https://goreportcard.com/badge/github.com/avast/retry-go?style=flat-square)](https://goreportcard.com/report/github.com/avast/retry-go)
+[![GoDoc](https://godoc.org/github.com/avast/retry-go?status.svg&style=flat-square)](http://godoc.org/github.com/avast/retry-go)
+[![codecov.io](https://codecov.io/github/avast/retry-go/coverage.svg?branch=master)](https://codecov.io/github/avast/retry-go?branch=master)
+[![Sourcegraph](https://sourcegraph.com/github.com/avast/retry-go/-/badge.svg)](https://sourcegraph.com/github.com/avast/retry-go?badge)
+
+Simple library for retry mechanism
+
+slightly inspired by
+[Try::Tiny::Retry](https://metacpan.org/pod/Try::Tiny::Retry)
+
+
+### SYNOPSIS
+
+http get with retry:
+
+ url := "http://example.com"
+ var body []byte
+
+ err := retry.Do(
+ func() error {
+ resp, err := http.Get(url)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ body, err = ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+
+ return nil
+ },
+ )
+
+ fmt.Println(body)
+
+[next examples](https://github.com/avast/retry-go/tree/master/examples)
+
+
+### SEE ALSO
+
+* [giantswarm/retry-go](https://github.com/giantswarm/retry-go) - slightly
+complicated interface.
+
+* [sethgrid/pester](https://github.com/sethgrid/pester) - only http retry for
+http calls with retries and backoff
+
+* [cenkalti/backoff](https://github.com/cenkalti/backoff) - Go port of the
+exponential backoff algorithm from Google's HTTP Client Library for Java. Really
+complicated interface.
+
+* [rafaeljesus/retry-go](https://github.com/rafaeljesus/retry-go) - looks good,
+slightly similar as this package, don't have 'simple' `Retry` method
+
+* [matryer/try](https://github.com/matryer/try) - very popular package,
+nonintuitive interface (for me)
+
+
+### BREAKING CHANGES
+
+1.0.2 -> 2.0.0
+
+* argument of `retry.Delay` is final delay (no multiplication by `retry.Units`
+anymore)
+
+* function `retry.Units` are removed
+
+* [more about this breaking change](https://github.com/avast/retry-go/issues/7)
+
+0.3.0 -> 1.0.0
+
+* `retry.Retry` function are changed to `retry.Do` function
+
+* `retry.RetryCustom` (OnRetry) and `retry.RetryCustomWithOpts` functions are
+now implement via functions produces Options (aka `retry.OnRetry`)
+
+## Usage
+
+#### func BackOffDelay
+
+```go
+func BackOffDelay(n uint, config *Config) time.Duration
+```
+BackOffDelay is a DelayType which increases delay between consecutive retries
+
+#### func Do
+
+```go
+func Do(retryableFunc RetryableFunc, opts ...Option) error
+```
+
+#### func FixedDelay
+
+```go
+func FixedDelay(_ uint, config *Config) time.Duration
+```
+FixedDelay is a DelayType which keeps delay the same through all iterations
+
+#### func IsRecoverable
+
+```go
+func IsRecoverable(err error) bool
+```
+IsRecoverable checks if error is an instance of `unrecoverableError`
+
+#### func Unrecoverable
+
+```go
+func Unrecoverable(err error) unrecoverableError
+```
+Unrecoverable wraps an error in `unrecoverableError` struct
+
+#### type Config
+
+```go
+type Config struct {
+}
+```
+
+
+#### type DelayTypeFunc
+
+```go
+type DelayTypeFunc func(n uint, config *Config) time.Duration
+```
+
+
+#### type Error
+
+```go
+type Error []error
+```
+
+Error type represents list of errors in retry
+
+#### func (Error) Error
+
+```go
+func (e Error) Error() string
+```
+Error method return string representation of Error It is an implementation of
+error interface
+
+#### func (Error) WrappedErrors
+
+```go
+func (e Error) WrappedErrors() []error
+```
+WrappedErrors returns the list of errors that this Error is wrapping. It is an
+implementation of the `errwrap.Wrapper` interface in package
+[errwrap](https://github.com/hashicorp/errwrap) so that `retry.Error` can be
+used with that library.
+
+#### type OnRetryFunc
+
+```go
+type OnRetryFunc func(n uint, err error)
+```
+
+Function signature of OnRetry function n = count of attempts
+
+#### type Option
+
+```go
+type Option func(*Config)
+```
+
+Option represents an option for retry.
+
+#### func Attempts
+
+```go
+func Attempts(attempts uint) Option
+```
+Attempts set count of retry default is 10
+
+#### func Delay
+
+```go
+func Delay(delay time.Duration) Option
+```
+Delay set delay between retry default is 100ms
+
+#### func DelayType
+
+```go
+func DelayType(delayType DelayTypeFunc) Option
+```
+DelayType set type of the delay between retries default is BackOff
+
+#### func LastErrorOnly
+
+```go
+func LastErrorOnly(lastErrorOnly bool) Option
+```
+return the direct last error that came from the retried function default is
+false (return wrapped errors with everything)
+
+#### func OnRetry
+
+```go
+func OnRetry(onRetry OnRetryFunc) Option
+```
+OnRetry function callback are called each retry
+
+log each retry example:
+
+ retry.Do(
+ func() error {
+ return errors.New("some error")
+ },
+ retry.OnRetry(func(n uint, err error) {
+ log.Printf("#%d: %s\n", n, err)
+ }),
+ )
+
+#### func RetryIf
+
+```go
+func RetryIf(retryIf RetryIfFunc) Option
+```
+RetryIf controls whether a retry should be attempted after an error (assuming
+there are any retry attempts remaining)
+
+skip retry if special error example:
+
+ retry.Do(
+ func() error {
+ return errors.New("special error")
+ },
+ retry.RetryIf(func(err error) bool {
+ if err.Error() == "special error" {
+ return false
+ }
+ return true
+ })
+ )
+
+The default RetryIf stops execution if the error is wrapped using
+`retry.Unrecoverable`, so above example may also be shortened to:
+
+ retry.Do(
+ func() error {
+ return retry.Unrecoverable(errors.New("special error"))
+ }
+ )
+
+#### type RetryIfFunc
+
+```go
+type RetryIfFunc func(error) bool
+```
+
+Function signature of retry if function
+
+#### type RetryableFunc
+
+```go
+type RetryableFunc func() error
+```
+
+Function signature of retryable function
+
+## Contributing
+
+Contributions are very much welcome.
+
+### Makefile
+
+Makefile provides several handy rules, like README.md `generator` , `setup` for prepare build/dev environment, `test`, `cover`, etc...
+
+Try `make help` for more information.
+
+### Before pull request
+
+please try:
+* run tests (`make test`)
+* run linter (`make lint`)
+* if your IDE don't automaticaly do `go fmt`, run `go fmt` (`make fmt`)
+
+### README
+
+README.md are generate from template [.godocdown.tmpl](.godocdown.tmpl) and code documentation via [godocdown](https://github.com/robertkrimen/godocdown).
+
+Never edit README.md direct, because your change will be lost.
diff --git a/vendor/github.com/avast/retry-go/VERSION b/vendor/github.com/avast/retry-go/VERSION
new file mode 100644
index 00000000..005119ba
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/VERSION
@@ -0,0 +1 @@
+2.4.1
diff --git a/vendor/github.com/avast/retry-go/appveyor.yml b/vendor/github.com/avast/retry-go/appveyor.yml
new file mode 100644
index 00000000..dc5234ac
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/appveyor.yml
@@ -0,0 +1,19 @@
+version: "{build}"
+
+clone_folder: c:\Users\appveyor\go\src\github.com\avast\retry-go
+
+#os: Windows Server 2012 R2
+platform: x64
+
+install:
+ - copy c:\MinGW\bin\mingw32-make.exe c:\MinGW\bin\make.exe
+ - set GOPATH=C:\Users\appveyor\go
+ - set PATH=%PATH%;c:\MinGW\bin
+ - set PATH=%PATH%;%GOPATH%\bin;c:\go\bin
+ - set GOBIN=%GOPATH%\bin
+ - go version
+ - go env
+ - make setup
+
+build_script:
+ - make ci
diff --git a/vendor/github.com/avast/retry-go/options.go b/vendor/github.com/avast/retry-go/options.go
new file mode 100644
index 00000000..db20f5c3
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/options.go
@@ -0,0 +1,117 @@
+package retry
+
+import (
+ "time"
+)
+
+// Function signature of retry if function
+type RetryIfFunc func(error) bool
+
+// Function signature of OnRetry function
+// n = count of attempts
+type OnRetryFunc func(n uint, err error)
+
+type DelayTypeFunc func(n uint, config *Config) time.Duration
+
+type Config struct {
+ attempts uint
+ delay time.Duration
+ onRetry OnRetryFunc
+ retryIf RetryIfFunc
+ delayType DelayTypeFunc
+ lastErrorOnly bool
+}
+
+// Option represents an option for retry.
+type Option func(*Config)
+
+// return the direct last error that came from the retried function
+// default is false (return wrapped errors with everything)
+func LastErrorOnly(lastErrorOnly bool) Option {
+ return func(c *Config) {
+ c.lastErrorOnly = lastErrorOnly
+ }
+}
+
+// Attempts set count of retry
+// default is 10
+func Attempts(attempts uint) Option {
+ return func(c *Config) {
+ c.attempts = attempts
+ }
+}
+
+// Delay set delay between retry
+// default is 100ms
+func Delay(delay time.Duration) Option {
+ return func(c *Config) {
+ c.delay = delay
+ }
+}
+
+// DelayType set type of the delay between retries
+// default is BackOff
+func DelayType(delayType DelayTypeFunc) Option {
+ return func(c *Config) {
+ c.delayType = delayType
+ }
+}
+
+// BackOffDelay is a DelayType which increases delay between consecutive retries
+func BackOffDelay(n uint, config *Config) time.Duration {
+ return config.delay * (1 << n)
+}
+
+// FixedDelay is a DelayType which keeps delay the same through all iterations
+func FixedDelay(_ uint, config *Config) time.Duration {
+ return config.delay
+}
+
+// OnRetry function callback are called each retry
+//
+// log each retry example:
+//
+// retry.Do(
+// func() error {
+// return errors.New("some error")
+// },
+// retry.OnRetry(func(n uint, err error) {
+// log.Printf("#%d: %s\n", n, err)
+// }),
+// )
+func OnRetry(onRetry OnRetryFunc) Option {
+ return func(c *Config) {
+ c.onRetry = onRetry
+ }
+}
+
+// RetryIf controls whether a retry should be attempted after an error
+// (assuming there are any retry attempts remaining)
+//
+// skip retry if special error example:
+//
+// retry.Do(
+// func() error {
+// return errors.New("special error")
+// },
+// retry.RetryIf(func(err error) bool {
+// if err.Error() == "special error" {
+// return false
+// }
+// return true
+// })
+// )
+//
+// By default RetryIf stops execution if the error is wrapped using `retry.Unrecoverable`,
+// so above example may also be shortened to:
+//
+// retry.Do(
+// func() error {
+// return retry.Unrecoverable(errors.New("special error"))
+// }
+// )
+func RetryIf(retryIf RetryIfFunc) Option {
+ return func(c *Config) {
+ c.retryIf = retryIf
+ }
+}
diff --git a/vendor/github.com/avast/retry-go/retry.go b/vendor/github.com/avast/retry-go/retry.go
new file mode 100644
index 00000000..098c6dde
--- /dev/null
+++ b/vendor/github.com/avast/retry-go/retry.go
@@ -0,0 +1,182 @@
+/*
+Simple library for retry mechanism
+
+slightly inspired by [Try::Tiny::Retry](https://metacpan.org/pod/Try::Tiny::Retry)
+
+SYNOPSIS
+
+http get with retry:
+
+ url := "http://example.com"
+ var body []byte
+
+ err := retry.Do(
+ func() error {
+ resp, err := http.Get(url)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ body, err = ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+
+ return nil
+ },
+ )
+
+ fmt.Println(body)
+
+[next examples](https://github.com/avast/retry-go/tree/master/examples)
+
+
+SEE ALSO
+
+* [giantswarm/retry-go](https://github.com/giantswarm/retry-go) - slightly complicated interface.
+
+* [sethgrid/pester](https://github.com/sethgrid/pester) - only http retry for http calls with retries and backoff
+
+* [cenkalti/backoff](https://github.com/cenkalti/backoff) - Go port of the exponential backoff algorithm from Google's HTTP Client Library for Java. Really complicated interface.
+
+* [rafaeljesus/retry-go](https://github.com/rafaeljesus/retry-go) - looks good, slightly similar as this package, don't have 'simple' `Retry` method
+
+* [matryer/try](https://github.com/matryer/try) - very popular package, nonintuitive interface (for me)
+
+BREAKING CHANGES
+
+1.0.2 -> 2.0.0
+
+* argument of `retry.Delay` is final delay (no multiplication by `retry.Units` anymore)
+
+* function `retry.Units` are removed
+
+* [more about this breaking change](https://github.com/avast/retry-go/issues/7)
+
+
+0.3.0 -> 1.0.0
+
+* `retry.Retry` function are changed to `retry.Do` function
+
+* `retry.RetryCustom` (OnRetry) and `retry.RetryCustomWithOpts` functions are now implement via functions produces Options (aka `retry.OnRetry`)
+
+
+*/
+package retry
+
+import (
+ "fmt"
+ "strings"
+ "time"
+)
+
+// Function signature of retryable function
+type RetryableFunc func() error
+
+func Do(retryableFunc RetryableFunc, opts ...Option) error {
+ var n uint
+
+ //default
+ config := &Config{
+ attempts: 10,
+ delay: 100 * time.Millisecond,
+ onRetry: func(n uint, err error) {},
+ retryIf: IsRecoverable,
+ delayType: BackOffDelay,
+ lastErrorOnly: false,
+ }
+
+ //apply opts
+ for _, opt := range opts {
+ opt(config)
+ }
+
+ errorLog := make(Error, config.attempts)
+
+ for n < config.attempts {
+ err := retryableFunc()
+
+ if err != nil {
+ config.onRetry(n, err)
+ errorLog[n] = unpackUnrecoverable(err)
+
+ if !config.retryIf(err) {
+ break
+ }
+
+ // if this is last attempt - don't wait
+ if n == config.attempts-1 {
+ break
+ }
+
+ delayTime := config.delayType(n, config)
+ time.Sleep(delayTime)
+ } else {
+ return nil
+ }
+
+ n++
+ }
+
+ if config.lastErrorOnly {
+ return errorLog[n]
+ }
+ return errorLog
+}
+
+// Error type represents list of errors in retry
+type Error []error
+
+// Error method return string representation of Error
+// It is an implementation of error interface
+func (e Error) Error() string {
+ logWithNumber := make([]string, lenWithoutNil(e))
+ for i, l := range e {
+ if l != nil {
+ logWithNumber[i] = fmt.Sprintf("#%d: %s", i+1, l.Error())
+ }
+ }
+
+ return fmt.Sprintf("All attempts fail:\n%s", strings.Join(logWithNumber, "\n"))
+}
+
+func lenWithoutNil(e Error) (count int) {
+ for _, v := range e {
+ if v != nil {
+ count++
+ }
+ }
+
+ return
+}
+
+// WrappedErrors returns the list of errors that this Error is wrapping.
+// It is an implementation of the `errwrap.Wrapper` interface
+// in package [errwrap](https://github.com/hashicorp/errwrap) so that
+// `retry.Error` can be used with that library.
+func (e Error) WrappedErrors() []error {
+ return e
+}
+
+type unrecoverableError struct {
+ error
+}
+
+// Unrecoverable wraps an error in `unrecoverableError` struct
+func Unrecoverable(err error) error {
+ return unrecoverableError{err}
+}
+
+// IsRecoverable checks if error is an instance of `unrecoverableError`
+func IsRecoverable(err error) bool {
+ _, isUnrecoverable := err.(unrecoverableError)
+ return !isUnrecoverable
+}
+
+func unpackUnrecoverable(err error) error {
+ if unrecoverable, isUnrecoverable := err.(unrecoverableError); isUnrecoverable {
+ return unrecoverable.error
+ }
+
+ return err
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/.gitignore b/vendor/github.com/bsm/sarama-cluster/.gitignore
new file mode 100644
index 00000000..88113c5b
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/.gitignore
@@ -0,0 +1,4 @@
+*.log
+*.pid
+kafka*/
+vendor/
diff --git a/vendor/github.com/bsm/sarama-cluster/.travis.yml b/vendor/github.com/bsm/sarama-cluster/.travis.yml
new file mode 100644
index 00000000..07c7c10f
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/.travis.yml
@@ -0,0 +1,18 @@
+sudo: false
+language: go
+go:
+ - 1.10.x
+ - 1.9.x
+install:
+ - go get -u github.com/golang/dep/cmd/dep
+ - dep ensure
+env:
+ - SCALA_VERSION=2.12 KAFKA_VERSION=0.11.0.1
+ - SCALA_VERSION=2.12 KAFKA_VERSION=1.0.1
+ - SCALA_VERSION=2.12 KAFKA_VERSION=1.1.0
+script:
+ - make default test-race
+addons:
+ apt:
+ packages:
+ - oracle-java8-set-default
diff --git a/vendor/github.com/bsm/sarama-cluster/Gopkg.lock b/vendor/github.com/bsm/sarama-cluster/Gopkg.lock
new file mode 100644
index 00000000..e1bc1102
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/Gopkg.lock
@@ -0,0 +1,151 @@
+# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
+
+
+[[projects]]
+ name = "github.com/Shopify/sarama"
+ packages = ["."]
+ revision = "35324cf48e33d8260e1c7c18854465a904ade249"
+ version = "v1.17.0"
+
+[[projects]]
+ name = "github.com/davecgh/go-spew"
+ packages = ["spew"]
+ revision = "346938d642f2ec3594ed81d874461961cd0faa76"
+ version = "v1.1.0"
+
+[[projects]]
+ name = "github.com/eapache/go-resiliency"
+ packages = ["breaker"]
+ revision = "ea41b0fad31007accc7f806884dcdf3da98b79ce"
+ version = "v1.1.0"
+
+[[projects]]
+ branch = "master"
+ name = "github.com/eapache/go-xerial-snappy"
+ packages = ["."]
+ revision = "bb955e01b9346ac19dc29eb16586c90ded99a98c"
+
+[[projects]]
+ name = "github.com/eapache/queue"
+ packages = ["."]
+ revision = "44cc805cf13205b55f69e14bcb69867d1ae92f98"
+ version = "v1.1.0"
+
+[[projects]]
+ branch = "master"
+ name = "github.com/golang/snappy"
+ packages = ["."]
+ revision = "2e65f85255dbc3072edf28d6b5b8efc472979f5a"
+
+[[projects]]
+ name = "github.com/onsi/ginkgo"
+ packages = [
+ ".",
+ "config",
+ "extensions/table",
+ "internal/codelocation",
+ "internal/containernode",
+ "internal/failer",
+ "internal/leafnodes",
+ "internal/remote",
+ "internal/spec",
+ "internal/spec_iterator",
+ "internal/specrunner",
+ "internal/suite",
+ "internal/testingtproxy",
+ "internal/writer",
+ "reporters",
+ "reporters/stenographer",
+ "reporters/stenographer/support/go-colorable",
+ "reporters/stenographer/support/go-isatty",
+ "types"
+ ]
+ revision = "fa5fabab2a1bfbd924faf4c067d07ae414e2aedf"
+ version = "v1.5.0"
+
+[[projects]]
+ name = "github.com/onsi/gomega"
+ packages = [
+ ".",
+ "format",
+ "internal/assertion",
+ "internal/asyncassertion",
+ "internal/oraclematcher",
+ "internal/testingtsupport",
+ "matchers",
+ "matchers/support/goraph/bipartitegraph",
+ "matchers/support/goraph/edge",
+ "matchers/support/goraph/node",
+ "matchers/support/goraph/util",
+ "types"
+ ]
+ revision = "62bff4df71bdbc266561a0caee19f0594b17c240"
+ version = "v1.4.0"
+
+[[projects]]
+ name = "github.com/pierrec/lz4"
+ packages = [
+ ".",
+ "internal/xxh32"
+ ]
+ revision = "6b9367c9ff401dbc54fabce3fb8d972e799b702d"
+ version = "v2.0.2"
+
+[[projects]]
+ branch = "master"
+ name = "github.com/rcrowley/go-metrics"
+ packages = ["."]
+ revision = "e2704e165165ec55d062f5919b4b29494e9fa790"
+
+[[projects]]
+ branch = "master"
+ name = "golang.org/x/net"
+ packages = [
+ "html",
+ "html/atom",
+ "html/charset"
+ ]
+ revision = "afe8f62b1d6bbd81f31868121a50b06d8188e1f9"
+
+[[projects]]
+ branch = "master"
+ name = "golang.org/x/sys"
+ packages = ["unix"]
+ revision = "63fc586f45fe72d95d5240a5d5eb95e6503907d3"
+
+[[projects]]
+ name = "golang.org/x/text"
+ packages = [
+ "encoding",
+ "encoding/charmap",
+ "encoding/htmlindex",
+ "encoding/internal",
+ "encoding/internal/identifier",
+ "encoding/japanese",
+ "encoding/korean",
+ "encoding/simplifiedchinese",
+ "encoding/traditionalchinese",
+ "encoding/unicode",
+ "internal/gen",
+ "internal/tag",
+ "internal/utf8internal",
+ "language",
+ "runes",
+ "transform",
+ "unicode/cldr"
+ ]
+ revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
+ version = "v0.3.0"
+
+[[projects]]
+ name = "gopkg.in/yaml.v2"
+ packages = ["."]
+ revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
+ version = "v2.2.1"
+
+[solve-meta]
+ analyzer-name = "dep"
+ analyzer-version = 1
+ inputs-digest = "2fa33a2d1ae87e0905ef09332bb4b3fda29179f6bcd48fd3b94070774b9e458b"
+ solver-name = "gps-cdcl"
+ solver-version = 1
diff --git a/vendor/github.com/bsm/sarama-cluster/Gopkg.toml b/vendor/github.com/bsm/sarama-cluster/Gopkg.toml
new file mode 100644
index 00000000..1eecfefc
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/Gopkg.toml
@@ -0,0 +1,26 @@
+
+# Gopkg.toml example
+#
+# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
+# for detailed Gopkg.toml documentation.
+#
+# required = ["github.com/user/thing/cmd/thing"]
+# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
+#
+# [[constraint]]
+# name = "github.com/user/project"
+# version = "1.0.0"
+#
+# [[constraint]]
+# name = "github.com/user/project2"
+# branch = "dev"
+# source = "github.com/myfork/project2"
+#
+# [[override]]
+# name = "github.com/x/y"
+# version = "2.4.0"
+
+
+[[constraint]]
+ name = "github.com/Shopify/sarama"
+ version = "^1.14.0"
diff --git a/vendor/github.com/bsm/sarama-cluster/LICENSE b/vendor/github.com/bsm/sarama-cluster/LICENSE
new file mode 100644
index 00000000..127751c4
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/LICENSE
@@ -0,0 +1,22 @@
+(The MIT License)
+
+Copyright (c) 2017 Black Square Media Ltd
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/bsm/sarama-cluster/Makefile b/vendor/github.com/bsm/sarama-cluster/Makefile
new file mode 100644
index 00000000..25c5bc20
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/Makefile
@@ -0,0 +1,35 @@
+SCALA_VERSION?= 2.12
+KAFKA_VERSION?= 1.1.0
+KAFKA_DIR= kafka_$(SCALA_VERSION)-$(KAFKA_VERSION)
+KAFKA_SRC= https://archive.apache.org/dist/kafka/$(KAFKA_VERSION)/$(KAFKA_DIR).tgz
+KAFKA_ROOT= testdata/$(KAFKA_DIR)
+PKG=$(shell go list ./... | grep -v vendor)
+
+default: vet test
+
+vet:
+ go vet $(PKG)
+
+test: testdeps
+ KAFKA_DIR=$(KAFKA_DIR) go test $(PKG) -ginkgo.slowSpecThreshold=60
+
+test-verbose: testdeps
+ KAFKA_DIR=$(KAFKA_DIR) go test $(PKG) -ginkgo.slowSpecThreshold=60 -v
+
+test-race: testdeps
+ KAFKA_DIR=$(KAFKA_DIR) go test $(PKG) -ginkgo.slowSpecThreshold=60 -v -race
+
+testdeps: $(KAFKA_ROOT)
+
+doc: README.md
+
+.PHONY: test testdeps vet doc
+
+# ---------------------------------------------------------------------
+
+$(KAFKA_ROOT):
+ @mkdir -p $(dir $@)
+ cd $(dir $@) && curl -sSL $(KAFKA_SRC) | tar xz
+
+README.md: README.md.tpl $(wildcard *.go)
+ becca -package $(subst $(GOPATH)/src/,,$(PWD))
diff --git a/vendor/github.com/bsm/sarama-cluster/README.md b/vendor/github.com/bsm/sarama-cluster/README.md
new file mode 100644
index 00000000..ebcd755d
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/README.md
@@ -0,0 +1,151 @@
+# Sarama Cluster
+
+[![GoDoc](https://godoc.org/github.com/bsm/sarama-cluster?status.svg)](https://godoc.org/github.com/bsm/sarama-cluster)
+[![Build Status](https://travis-ci.org/bsm/sarama-cluster.svg?branch=master)](https://travis-ci.org/bsm/sarama-cluster)
+[![Go Report Card](https://goreportcard.com/badge/github.com/bsm/sarama-cluster)](https://goreportcard.com/report/github.com/bsm/sarama-cluster)
+[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+
+Cluster extensions for [Sarama](https://github.com/Shopify/sarama), the Go client library for Apache Kafka 0.9 (and later).
+
+## Documentation
+
+Documentation and example are available via godoc at http://godoc.org/github.com/bsm/sarama-cluster
+
+## Examples
+
+Consumers have two modes of operation. In the default multiplexed mode messages (and errors) of multiple
+topics and partitions are all passed to the single channel:
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+func main() {
+
+ // init (custom) config, enable errors and notifications
+ config := cluster.NewConfig()
+ config.Consumer.Return.Errors = true
+ config.Group.Return.Notifications = true
+
+ // init consumer
+ brokers := []string{"127.0.0.1:9092"}
+ topics := []string{"my_topic", "other_topic"}
+ consumer, err := cluster.NewConsumer(brokers, "my-consumer-group", topics, config)
+ if err != nil {
+ panic(err)
+ }
+ defer consumer.Close()
+
+ // trap SIGINT to trigger a shutdown.
+ signals := make(chan os.Signal, 1)
+ signal.Notify(signals, os.Interrupt)
+
+ // consume errors
+ go func() {
+ for err := range consumer.Errors() {
+ log.Printf("Error: %s\n", err.Error())
+ }
+ }()
+
+ // consume notifications
+ go func() {
+ for ntf := range consumer.Notifications() {
+ log.Printf("Rebalanced: %+v\n", ntf)
+ }
+ }()
+
+ // consume messages, watch signals
+ for {
+ select {
+ case msg, ok := <-consumer.Messages():
+ if ok {
+ fmt.Fprintf(os.Stdout, "%s/%d/%d\t%s\t%s\n", msg.Topic, msg.Partition, msg.Offset, msg.Key, msg.Value)
+ consumer.MarkOffset(msg, "") // mark message as processed
+ }
+ case <-signals:
+ return
+ }
+ }
+}
+```
+
+Users who require access to individual partitions can use the partitioned mode which exposes access to partition-level
+consumers:
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+func main() {
+
+ // init (custom) config, set mode to ConsumerModePartitions
+ config := cluster.NewConfig()
+ config.Group.Mode = cluster.ConsumerModePartitions
+
+ // init consumer
+ brokers := []string{"127.0.0.1:9092"}
+ topics := []string{"my_topic", "other_topic"}
+ consumer, err := cluster.NewConsumer(brokers, "my-consumer-group", topics, config)
+ if err != nil {
+ panic(err)
+ }
+ defer consumer.Close()
+
+ // trap SIGINT to trigger a shutdown.
+ signals := make(chan os.Signal, 1)
+ signal.Notify(signals, os.Interrupt)
+
+ // consume partitions
+ for {
+ select {
+ case part, ok := <-consumer.Partitions():
+ if !ok {
+ return
+ }
+
+ // start a separate goroutine to consume messages
+ go func(pc cluster.PartitionConsumer) {
+ for msg := range pc.Messages() {
+ fmt.Fprintf(os.Stdout, "%s/%d/%d\t%s\t%s\n", msg.Topic, msg.Partition, msg.Offset, msg.Key, msg.Value)
+ consumer.MarkOffset(msg, "") // mark message as processed
+ }
+ }(part)
+ case <-signals:
+ return
+ }
+ }
+}
+```
+
+## Running tests
+
+You need to install Ginkgo & Gomega to run tests. Please see
+http://onsi.github.io/ginkgo for more details.
+
+To run tests, call:
+
+ $ make test
+
+## Troubleshooting
+
+### Consumer not receiving any messages?
+
+By default, sarama's `Config.Consumer.Offsets.Initial` is set to `sarama.OffsetNewest`. This means that in the event that a brand new consumer is created, and it has never committed any offsets to kafka, it will only receive messages starting from the message after the current one that was written.
+
+If you wish to receive all messages (from the start of all messages in the topic) in the event that a consumer does not have any offsets committed to kafka, you need to set `Config.Consumer.Offsets.Initial` to `sarama.OffsetOldest`.
diff --git a/vendor/github.com/bsm/sarama-cluster/README.md.tpl b/vendor/github.com/bsm/sarama-cluster/README.md.tpl
new file mode 100644
index 00000000..5f63a690
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/README.md.tpl
@@ -0,0 +1,67 @@
+# Sarama Cluster
+
+[![GoDoc](https://godoc.org/github.com/bsm/sarama-cluster?status.svg)](https://godoc.org/github.com/bsm/sarama-cluster)
+[![Build Status](https://travis-ci.org/bsm/sarama-cluster.svg?branch=master)](https://travis-ci.org/bsm/sarama-cluster)
+[![Go Report Card](https://goreportcard.com/badge/github.com/bsm/sarama-cluster)](https://goreportcard.com/report/github.com/bsm/sarama-cluster)
+[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+
+Cluster extensions for [Sarama](https://github.com/Shopify/sarama), the Go client library for Apache Kafka 0.9 (and later).
+
+## Documentation
+
+Documentation and example are available via godoc at http://godoc.org/github.com/bsm/sarama-cluster
+
+## Examples
+
+Consumers have two modes of operation. In the default multiplexed mode messages (and errors) of multiple
+topics and partitions are all passed to the single channel:
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+func main() {{ "ExampleConsumer" | code }}
+```
+
+Users who require access to individual partitions can use the partitioned mode which exposes access to partition-level
+consumers:
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+func main() {{ "ExampleConsumer_Partitions" | code }}
+```
+
+## Running tests
+
+You need to install Ginkgo & Gomega to run tests. Please see
+http://onsi.github.io/ginkgo for more details.
+
+To run tests, call:
+
+ $ make test
+
+## Troubleshooting
+
+### Consumer not receiving any messages?
+
+By default, sarama's `Config.Consumer.Offsets.Initial` is set to `sarama.OffsetNewest`. This means that in the event that a brand new consumer is created, and it has never committed any offsets to kafka, it will only receive messages starting from the message after the current one that was written.
+
+If you wish to receive all messages (from the start of all messages in the topic) in the event that a consumer does not have any offsets committed to kafka, you need to set `Config.Consumer.Offsets.Initial` to `sarama.OffsetOldest`.
diff --git a/vendor/github.com/bsm/sarama-cluster/balancer.go b/vendor/github.com/bsm/sarama-cluster/balancer.go
new file mode 100644
index 00000000..3aeaecef
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/balancer.go
@@ -0,0 +1,170 @@
+package cluster
+
+import (
+ "math"
+ "sort"
+
+ "github.com/Shopify/sarama"
+)
+
+// NotificationType defines the type of notification
+type NotificationType uint8
+
+// String describes the notification type
+func (t NotificationType) String() string {
+ switch t {
+ case RebalanceStart:
+ return "rebalance start"
+ case RebalanceOK:
+ return "rebalance OK"
+ case RebalanceError:
+ return "rebalance error"
+ }
+ return "unknown"
+}
+
+const (
+ UnknownNotification NotificationType = iota
+ RebalanceStart
+ RebalanceOK
+ RebalanceError
+)
+
+// Notification are state events emitted by the consumers on rebalance
+type Notification struct {
+ // Type exposes the notification type
+ Type NotificationType
+
+ // Claimed contains topic/partitions that were claimed by this rebalance cycle
+ Claimed map[string][]int32
+
+ // Released contains topic/partitions that were released as part of this rebalance cycle
+ Released map[string][]int32
+
+ // Current are topic/partitions that are currently claimed to the consumer
+ Current map[string][]int32
+}
+
+func newNotification(current map[string][]int32) *Notification {
+ return &Notification{
+ Type: RebalanceStart,
+ Current: current,
+ }
+}
+
+func (n *Notification) success(current map[string][]int32) *Notification {
+ o := &Notification{
+ Type: RebalanceOK,
+ Claimed: make(map[string][]int32),
+ Released: make(map[string][]int32),
+ Current: current,
+ }
+ for topic, partitions := range current {
+ o.Claimed[topic] = int32Slice(partitions).Diff(int32Slice(n.Current[topic]))
+ }
+ for topic, partitions := range n.Current {
+ o.Released[topic] = int32Slice(partitions).Diff(int32Slice(current[topic]))
+ }
+ return o
+}
+
+// --------------------------------------------------------------------
+
+type topicInfo struct {
+ Partitions []int32
+ MemberIDs []string
+}
+
+func (info topicInfo) Perform(s Strategy) map[string][]int32 {
+ if s == StrategyRoundRobin {
+ return info.RoundRobin()
+ }
+ return info.Ranges()
+}
+
+func (info topicInfo) Ranges() map[string][]int32 {
+ sort.Strings(info.MemberIDs)
+
+ mlen := len(info.MemberIDs)
+ plen := len(info.Partitions)
+ res := make(map[string][]int32, mlen)
+
+ for pos, memberID := range info.MemberIDs {
+ n, i := float64(plen)/float64(mlen), float64(pos)
+ min := int(math.Floor(i*n + 0.5))
+ max := int(math.Floor((i+1)*n + 0.5))
+ sub := info.Partitions[min:max]
+ if len(sub) > 0 {
+ res[memberID] = sub
+ }
+ }
+ return res
+}
+
+func (info topicInfo) RoundRobin() map[string][]int32 {
+ sort.Strings(info.MemberIDs)
+
+ mlen := len(info.MemberIDs)
+ res := make(map[string][]int32, mlen)
+ for i, pnum := range info.Partitions {
+ memberID := info.MemberIDs[i%mlen]
+ res[memberID] = append(res[memberID], pnum)
+ }
+ return res
+}
+
+// --------------------------------------------------------------------
+
+type balancer struct {
+ client sarama.Client
+ topics map[string]topicInfo
+}
+
+func newBalancerFromMeta(client sarama.Client, members map[string]sarama.ConsumerGroupMemberMetadata) (*balancer, error) {
+ balancer := newBalancer(client)
+ for memberID, meta := range members {
+ for _, topic := range meta.Topics {
+ if err := balancer.Topic(topic, memberID); err != nil {
+ return nil, err
+ }
+ }
+ }
+ return balancer, nil
+}
+
+func newBalancer(client sarama.Client) *balancer {
+ return &balancer{
+ client: client,
+ topics: make(map[string]topicInfo),
+ }
+}
+
+func (r *balancer) Topic(name string, memberID string) error {
+ topic, ok := r.topics[name]
+ if !ok {
+ nums, err := r.client.Partitions(name)
+ if err != nil {
+ return err
+ }
+ topic = topicInfo{
+ Partitions: nums,
+ MemberIDs: make([]string, 0, 1),
+ }
+ }
+ topic.MemberIDs = append(topic.MemberIDs, memberID)
+ r.topics[name] = topic
+ return nil
+}
+
+func (r *balancer) Perform(s Strategy) map[string]map[string][]int32 {
+ res := make(map[string]map[string][]int32, 1)
+ for topic, info := range r.topics {
+ for memberID, partitions := range info.Perform(s) {
+ if _, ok := res[memberID]; !ok {
+ res[memberID] = make(map[string][]int32, 1)
+ }
+ res[memberID][topic] = partitions
+ }
+ }
+ return res
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/client.go b/vendor/github.com/bsm/sarama-cluster/client.go
new file mode 100644
index 00000000..42ffb30c
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/client.go
@@ -0,0 +1,50 @@
+package cluster
+
+import (
+ "errors"
+ "sync/atomic"
+
+ "github.com/Shopify/sarama"
+)
+
+var errClientInUse = errors.New("cluster: client is already used by another consumer")
+
+// Client is a group client
+type Client struct {
+ sarama.Client
+ config Config
+
+ inUse uint32
+}
+
+// NewClient creates a new client instance
+func NewClient(addrs []string, config *Config) (*Client, error) {
+ if config == nil {
+ config = NewConfig()
+ }
+
+ if err := config.Validate(); err != nil {
+ return nil, err
+ }
+
+ client, err := sarama.NewClient(addrs, &config.Config)
+ if err != nil {
+ return nil, err
+ }
+
+ return &Client{Client: client, config: *config}, nil
+}
+
+// ClusterConfig returns the cluster configuration.
+func (c *Client) ClusterConfig() *Config {
+ cfg := c.config
+ return &cfg
+}
+
+func (c *Client) claim() bool {
+ return atomic.CompareAndSwapUint32(&c.inUse, 0, 1)
+}
+
+func (c *Client) release() {
+ atomic.CompareAndSwapUint32(&c.inUse, 1, 0)
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/cluster.go b/vendor/github.com/bsm/sarama-cluster/cluster.go
new file mode 100644
index 00000000..adcf0e9c
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/cluster.go
@@ -0,0 +1,25 @@
+package cluster
+
+// Strategy for partition to consumer assignement
+type Strategy string
+
+const (
+ // StrategyRange is the default and assigns partition ranges to consumers.
+ // Example with six partitions and two consumers:
+ // C1: [0, 1, 2]
+ // C2: [3, 4, 5]
+ StrategyRange Strategy = "range"
+
+ // StrategyRoundRobin assigns partitions by alternating over consumers.
+ // Example with six partitions and two consumers:
+ // C1: [0, 2, 4]
+ // C2: [1, 3, 5]
+ StrategyRoundRobin Strategy = "roundrobin"
+)
+
+// Error instances are wrappers for internal errors with a context and
+// may be returned through the consumer's Errors() channel
+type Error struct {
+ Ctx string
+ error
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/config.go b/vendor/github.com/bsm/sarama-cluster/config.go
new file mode 100644
index 00000000..084b835f
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/config.go
@@ -0,0 +1,146 @@
+package cluster
+
+import (
+ "regexp"
+ "time"
+
+ "github.com/Shopify/sarama"
+)
+
+var minVersion = sarama.V0_9_0_0
+
+type ConsumerMode uint8
+
+const (
+ ConsumerModeMultiplex ConsumerMode = iota
+ ConsumerModePartitions
+)
+
+// Config extends sarama.Config with Group specific namespace
+type Config struct {
+ sarama.Config
+
+ // Group is the namespace for group management properties
+ Group struct {
+
+ // The strategy to use for the allocation of partitions to consumers (defaults to StrategyRange)
+ PartitionStrategy Strategy
+
+ // By default, messages and errors from the subscribed topics and partitions are all multiplexed and
+ // made available through the consumer's Messages() and Errors() channels.
+ //
+ // Users who require low-level access can enable ConsumerModePartitions where individual partitions
+ // are exposed on the Partitions() channel. Messages and errors must then be consumed on the partitions
+ // themselves.
+ Mode ConsumerMode
+
+ Offsets struct {
+ Retry struct {
+ // The numer retries when committing offsets (defaults to 3).
+ Max int
+ }
+ Synchronization struct {
+ // The duration allowed for other clients to commit their offsets before resumption in this client, e.g. during a rebalance
+ // NewConfig sets this to the Consumer.MaxProcessingTime duration of the Sarama configuration
+ DwellTime time.Duration
+ }
+ }
+
+ Session struct {
+ // The allowed session timeout for registered consumers (defaults to 30s).
+ // Must be within the allowed server range.
+ Timeout time.Duration
+ }
+
+ Heartbeat struct {
+ // Interval between each heartbeat (defaults to 3s). It should be no more
+ // than 1/3rd of the Group.Session.Timout setting
+ Interval time.Duration
+ }
+
+ // Return specifies which group channels will be populated. If they are set to true,
+ // you must read from the respective channels to prevent deadlock.
+ Return struct {
+ // If enabled, rebalance notification will be returned on the
+ // Notifications channel (default disabled).
+ Notifications bool
+ }
+
+ Topics struct {
+ // An additional whitelist of topics to subscribe to.
+ Whitelist *regexp.Regexp
+ // An additional blacklist of topics to avoid. If set, this will precede over
+ // the Whitelist setting.
+ Blacklist *regexp.Regexp
+ }
+
+ Member struct {
+ // Custom metadata to include when joining the group. The user data for all joined members
+ // can be retrieved by sending a DescribeGroupRequest to the broker that is the
+ // coordinator for the group.
+ UserData []byte
+ }
+ }
+}
+
+// NewConfig returns a new configuration instance with sane defaults.
+func NewConfig() *Config {
+ c := &Config{
+ Config: *sarama.NewConfig(),
+ }
+ c.Group.PartitionStrategy = StrategyRange
+ c.Group.Offsets.Retry.Max = 3
+ c.Group.Offsets.Synchronization.DwellTime = c.Consumer.MaxProcessingTime
+ c.Group.Session.Timeout = 30 * time.Second
+ c.Group.Heartbeat.Interval = 3 * time.Second
+ c.Config.Version = minVersion
+ return c
+}
+
+// Validate checks a Config instance. It will return a
+// sarama.ConfigurationError if the specified values don't make sense.
+func (c *Config) Validate() error {
+ if c.Group.Heartbeat.Interval%time.Millisecond != 0 {
+ sarama.Logger.Println("Group.Heartbeat.Interval only supports millisecond precision; nanoseconds will be truncated.")
+ }
+ if c.Group.Session.Timeout%time.Millisecond != 0 {
+ sarama.Logger.Println("Group.Session.Timeout only supports millisecond precision; nanoseconds will be truncated.")
+ }
+ if c.Group.PartitionStrategy != StrategyRange && c.Group.PartitionStrategy != StrategyRoundRobin {
+ sarama.Logger.Println("Group.PartitionStrategy is not supported; range will be assumed.")
+ }
+ if !c.Version.IsAtLeast(minVersion) {
+ sarama.Logger.Println("Version is not supported; 0.9. will be assumed.")
+ c.Version = minVersion
+ }
+ if err := c.Config.Validate(); err != nil {
+ return err
+ }
+
+ // validate the Group values
+ switch {
+ case c.Group.Offsets.Retry.Max < 0:
+ return sarama.ConfigurationError("Group.Offsets.Retry.Max must be >= 0")
+ case c.Group.Offsets.Synchronization.DwellTime <= 0:
+ return sarama.ConfigurationError("Group.Offsets.Synchronization.DwellTime must be > 0")
+ case c.Group.Offsets.Synchronization.DwellTime > 10*time.Minute:
+ return sarama.ConfigurationError("Group.Offsets.Synchronization.DwellTime must be <= 10m")
+ case c.Group.Heartbeat.Interval <= 0:
+ return sarama.ConfigurationError("Group.Heartbeat.Interval must be > 0")
+ case c.Group.Session.Timeout <= 0:
+ return sarama.ConfigurationError("Group.Session.Timeout must be > 0")
+ case !c.Metadata.Full && c.Group.Topics.Whitelist != nil:
+ return sarama.ConfigurationError("Metadata.Full must be enabled when Group.Topics.Whitelist is used")
+ case !c.Metadata.Full && c.Group.Topics.Blacklist != nil:
+ return sarama.ConfigurationError("Metadata.Full must be enabled when Group.Topics.Blacklist is used")
+ }
+
+ // ensure offset is correct
+ switch c.Consumer.Offsets.Initial {
+ case sarama.OffsetOldest, sarama.OffsetNewest:
+ default:
+ return sarama.ConfigurationError("Consumer.Offsets.Initial must be either OffsetOldest or OffsetNewest")
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/consumer.go b/vendor/github.com/bsm/sarama-cluster/consumer.go
new file mode 100644
index 00000000..e7a67dac
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/consumer.go
@@ -0,0 +1,919 @@
+package cluster
+
+import (
+ "sort"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/Shopify/sarama"
+)
+
+// Consumer is a cluster group consumer
+type Consumer struct {
+ client *Client
+ ownClient bool
+
+ consumer sarama.Consumer
+ subs *partitionMap
+
+ consumerID string
+ groupID string
+
+ memberID string
+ generationID int32
+ membershipMu sync.RWMutex
+
+ coreTopics []string
+ extraTopics []string
+
+ dying, dead chan none
+ closeOnce sync.Once
+
+ consuming int32
+ messages chan *sarama.ConsumerMessage
+ errors chan error
+ partitions chan PartitionConsumer
+ notifications chan *Notification
+
+ commitMu sync.Mutex
+}
+
+// NewConsumer initializes a new consumer
+func NewConsumer(addrs []string, groupID string, topics []string, config *Config) (*Consumer, error) {
+ client, err := NewClient(addrs, config)
+ if err != nil {
+ return nil, err
+ }
+
+ consumer, err := NewConsumerFromClient(client, groupID, topics)
+ if err != nil {
+ return nil, err
+ }
+ consumer.ownClient = true
+ return consumer, nil
+}
+
+// NewConsumerFromClient initializes a new consumer from an existing client.
+//
+// Please note that clients cannot be shared between consumers (due to Kafka internals),
+// they can only be re-used which requires the user to call Close() on the first consumer
+// before using this method again to initialize another one. Attempts to use a client with
+// more than one consumer at a time will return errors.
+func NewConsumerFromClient(client *Client, groupID string, topics []string) (*Consumer, error) {
+ if !client.claim() {
+ return nil, errClientInUse
+ }
+
+ consumer, err := sarama.NewConsumerFromClient(client.Client)
+ if err != nil {
+ client.release()
+ return nil, err
+ }
+
+ sort.Strings(topics)
+ c := &Consumer{
+ client: client,
+ consumer: consumer,
+ subs: newPartitionMap(),
+ groupID: groupID,
+
+ coreTopics: topics,
+
+ dying: make(chan none),
+ dead: make(chan none),
+
+ messages: make(chan *sarama.ConsumerMessage),
+ errors: make(chan error, client.config.ChannelBufferSize),
+ partitions: make(chan PartitionConsumer, 1),
+ notifications: make(chan *Notification),
+ }
+ if err := c.client.RefreshCoordinator(groupID); err != nil {
+ client.release()
+ return nil, err
+ }
+
+ go c.mainLoop()
+ return c, nil
+}
+
+// Messages returns the read channel for the messages that are returned by
+// the broker.
+//
+// This channel will only return if Config.Group.Mode option is set to
+// ConsumerModeMultiplex (default).
+func (c *Consumer) Messages() <-chan *sarama.ConsumerMessage { return c.messages }
+
+// Partitions returns the read channels for individual partitions of this broker.
+//
+// This will channel will only return if Config.Group.Mode option is set to
+// ConsumerModePartitions.
+//
+// The Partitions() channel must be listened to for the life of this consumer;
+// when a rebalance happens old partitions will be closed (naturally come to
+// completion) and new ones will be emitted. The returned channel will only close
+// when the consumer is completely shut down.
+func (c *Consumer) Partitions() <-chan PartitionConsumer { return c.partitions }
+
+// Errors returns a read channel of errors that occur during offset management, if
+// enabled. By default, errors are logged and not returned over this channel. If
+// you want to implement any custom error handling, set your config's
+// Consumer.Return.Errors setting to true, and read from this channel.
+func (c *Consumer) Errors() <-chan error { return c.errors }
+
+// Notifications returns a channel of Notifications that occur during consumer
+// rebalancing. Notifications will only be emitted over this channel, if your config's
+// Group.Return.Notifications setting to true.
+func (c *Consumer) Notifications() <-chan *Notification { return c.notifications }
+
+// HighWaterMarks returns the current high water marks for each topic and partition
+// Consistency between partitions is not guaranteed since high water marks are updated separately.
+func (c *Consumer) HighWaterMarks() map[string]map[int32]int64 { return c.consumer.HighWaterMarks() }
+
+// MarkOffset marks the provided message as processed, alongside a metadata string
+// that represents the state of the partition consumer at that point in time. The
+// metadata string can be used by another consumer to restore that state, so it
+// can resume consumption.
+//
+// Note: calling MarkOffset does not necessarily commit the offset to the backend
+// store immediately for efficiency reasons, and it may never be committed if
+// your application crashes. This means that you may end up processing the same
+// message twice, and your processing should ideally be idempotent.
+func (c *Consumer) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
+ if sub := c.subs.Fetch(msg.Topic, msg.Partition); sub != nil {
+ sub.MarkOffset(msg.Offset, metadata)
+ }
+}
+
+// MarkPartitionOffset marks an offset of the provided topic/partition as processed.
+// See MarkOffset for additional explanation.
+func (c *Consumer) MarkPartitionOffset(topic string, partition int32, offset int64, metadata string) {
+ if sub := c.subs.Fetch(topic, partition); sub != nil {
+ sub.MarkOffset(offset, metadata)
+ }
+}
+
+// MarkOffsets marks stashed offsets as processed.
+// See MarkOffset for additional explanation.
+func (c *Consumer) MarkOffsets(s *OffsetStash) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ for tp, info := range s.offsets {
+ if sub := c.subs.Fetch(tp.Topic, tp.Partition); sub != nil {
+ sub.MarkOffset(info.Offset, info.Metadata)
+ }
+ delete(s.offsets, tp)
+ }
+}
+
+// ResetOffsets marks the provided message as processed, alongside a metadata string
+// that represents the state of the partition consumer at that point in time. The
+// metadata string can be used by another consumer to restore that state, so it
+// can resume consumption.
+//
+// Difference between ResetOffset and MarkOffset is that it allows to rewind to an earlier offset
+func (c *Consumer) ResetOffset(msg *sarama.ConsumerMessage, metadata string) {
+ if sub := c.subs.Fetch(msg.Topic, msg.Partition); sub != nil {
+ sub.ResetOffset(msg.Offset, metadata)
+ }
+}
+
+// ResetPartitionOffset marks an offset of the provided topic/partition as processed.
+// See ResetOffset for additional explanation.
+func (c *Consumer) ResetPartitionOffset(topic string, partition int32, offset int64, metadata string) {
+ sub := c.subs.Fetch(topic, partition)
+ if sub != nil {
+ sub.ResetOffset(offset, metadata)
+ }
+}
+
+// ResetOffsets marks stashed offsets as processed.
+// See ResetOffset for additional explanation.
+func (c *Consumer) ResetOffsets(s *OffsetStash) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ for tp, info := range s.offsets {
+ if sub := c.subs.Fetch(tp.Topic, tp.Partition); sub != nil {
+ sub.ResetOffset(info.Offset, info.Metadata)
+ }
+ delete(s.offsets, tp)
+ }
+}
+
+// Subscriptions returns the consumed topics and partitions
+func (c *Consumer) Subscriptions() map[string][]int32 {
+ return c.subs.Info()
+}
+
+// CommitOffsets allows to manually commit previously marked offsets. By default there is no
+// need to call this function as the consumer will commit offsets automatically
+// using the Config.Consumer.Offsets.CommitInterval setting.
+//
+// Please be aware that calling this function during an internal rebalance cycle may return
+// broker errors (e.g. sarama.ErrUnknownMemberId or sarama.ErrIllegalGeneration).
+func (c *Consumer) CommitOffsets() error {
+ c.commitMu.Lock()
+ defer c.commitMu.Unlock()
+
+ memberID, generationID := c.membership()
+ req := &sarama.OffsetCommitRequest{
+ Version: 2,
+ ConsumerGroup: c.groupID,
+ ConsumerGroupGeneration: generationID,
+ ConsumerID: memberID,
+ RetentionTime: -1,
+ }
+
+ if ns := c.client.config.Consumer.Offsets.Retention; ns != 0 {
+ req.RetentionTime = int64(ns / time.Millisecond)
+ }
+
+ snap := c.subs.Snapshot()
+ dirty := false
+ for tp, state := range snap {
+ if state.Dirty {
+ dirty = true
+ req.AddBlock(tp.Topic, tp.Partition, state.Info.Offset, 0, state.Info.Metadata)
+ }
+ }
+ if !dirty {
+ return nil
+ }
+
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return err
+ }
+
+ resp, err := broker.CommitOffset(req)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return err
+ }
+
+ for topic, errs := range resp.Errors {
+ for partition, kerr := range errs {
+ if kerr != sarama.ErrNoError {
+ err = kerr
+ } else if state, ok := snap[topicPartition{topic, partition}]; ok {
+ if sub := c.subs.Fetch(topic, partition); sub != nil {
+ sub.markCommitted(state.Info.Offset)
+ }
+ }
+ }
+ }
+ return err
+}
+
+// Close safely closes the consumer and releases all resources
+func (c *Consumer) Close() (err error) {
+ c.closeOnce.Do(func() {
+ close(c.dying)
+ <-c.dead
+
+ if e := c.release(); e != nil {
+ err = e
+ }
+ if e := c.consumer.Close(); e != nil {
+ err = e
+ }
+ close(c.messages)
+ close(c.errors)
+
+ if e := c.leaveGroup(); e != nil {
+ err = e
+ }
+ close(c.partitions)
+ close(c.notifications)
+
+ // drain
+ for range c.messages {
+ }
+ for range c.errors {
+ }
+ for p := range c.partitions {
+ _ = p.Close()
+ }
+ for range c.notifications {
+ }
+
+ c.client.release()
+ if c.ownClient {
+ if e := c.client.Close(); e != nil {
+ err = e
+ }
+ }
+ })
+ return
+}
+
+func (c *Consumer) mainLoop() {
+ defer close(c.dead)
+ defer atomic.StoreInt32(&c.consuming, 0)
+
+ for {
+ atomic.StoreInt32(&c.consuming, 0)
+
+ // Check if close was requested
+ select {
+ case <-c.dying:
+ return
+ default:
+ }
+
+ // Start next consume cycle
+ c.nextTick()
+ }
+}
+
+func (c *Consumer) nextTick() {
+ // Remember previous subscriptions
+ var notification *Notification
+ if c.client.config.Group.Return.Notifications {
+ notification = newNotification(c.subs.Info())
+ }
+
+ // Refresh coordinator
+ if err := c.refreshCoordinator(); err != nil {
+ c.rebalanceError(err, nil)
+ return
+ }
+
+ // Release subscriptions
+ if err := c.release(); err != nil {
+ c.rebalanceError(err, nil)
+ return
+ }
+
+ // Issue rebalance start notification
+ if c.client.config.Group.Return.Notifications {
+ c.handleNotification(notification)
+ }
+
+ // Rebalance, fetch new subscriptions
+ subs, err := c.rebalance()
+ if err != nil {
+ c.rebalanceError(err, notification)
+ return
+ }
+
+ // Coordinate loops, make sure everything is
+ // stopped on exit
+ tomb := newLoopTomb()
+ defer tomb.Close()
+
+ // Start the heartbeat
+ tomb.Go(c.hbLoop)
+
+ // Subscribe to topic/partitions
+ if err := c.subscribe(tomb, subs); err != nil {
+ c.rebalanceError(err, notification)
+ return
+ }
+
+ // Update/issue notification with new claims
+ if c.client.config.Group.Return.Notifications {
+ notification = notification.success(subs)
+ c.handleNotification(notification)
+ }
+
+ // Start topic watcher loop
+ tomb.Go(c.twLoop)
+
+ // Start consuming and committing offsets
+ tomb.Go(c.cmLoop)
+ atomic.StoreInt32(&c.consuming, 1)
+
+ // Wait for signals
+ select {
+ case <-tomb.Dying():
+ case <-c.dying:
+ }
+}
+
+// heartbeat loop, triggered by the mainLoop
+func (c *Consumer) hbLoop(stopped <-chan none) {
+ ticker := time.NewTicker(c.client.config.Group.Heartbeat.Interval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ticker.C:
+ switch err := c.heartbeat(); err {
+ case nil, sarama.ErrNoError:
+ case sarama.ErrNotCoordinatorForConsumer, sarama.ErrRebalanceInProgress:
+ return
+ default:
+ c.handleError(&Error{Ctx: "heartbeat", error: err})
+ return
+ }
+ case <-stopped:
+ return
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+// topic watcher loop, triggered by the mainLoop
+func (c *Consumer) twLoop(stopped <-chan none) {
+ ticker := time.NewTicker(c.client.config.Metadata.RefreshFrequency / 2)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ticker.C:
+ topics, err := c.client.Topics()
+ if err != nil {
+ c.handleError(&Error{Ctx: "topics", error: err})
+ return
+ }
+
+ for _, topic := range topics {
+ if !c.isKnownCoreTopic(topic) &&
+ !c.isKnownExtraTopic(topic) &&
+ c.isPotentialExtraTopic(topic) {
+ return
+ }
+ }
+ case <-stopped:
+ return
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+// commit loop, triggered by the mainLoop
+func (c *Consumer) cmLoop(stopped <-chan none) {
+ ticker := time.NewTicker(c.client.config.Consumer.Offsets.CommitInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ticker.C:
+ if err := c.commitOffsetsWithRetry(c.client.config.Group.Offsets.Retry.Max); err != nil {
+ c.handleError(&Error{Ctx: "commit", error: err})
+ return
+ }
+ case <-stopped:
+ return
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+func (c *Consumer) rebalanceError(err error, n *Notification) {
+ if n != nil {
+ n.Type = RebalanceError
+ c.handleNotification(n)
+ }
+
+ switch err {
+ case sarama.ErrRebalanceInProgress:
+ default:
+ c.handleError(&Error{Ctx: "rebalance", error: err})
+ }
+
+ select {
+ case <-c.dying:
+ case <-time.After(c.client.config.Metadata.Retry.Backoff):
+ }
+}
+
+func (c *Consumer) handleNotification(n *Notification) {
+ if c.client.config.Group.Return.Notifications {
+ select {
+ case c.notifications <- n:
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+func (c *Consumer) handleError(e *Error) {
+ if c.client.config.Consumer.Return.Errors {
+ select {
+ case c.errors <- e:
+ case <-c.dying:
+ return
+ }
+ } else {
+ sarama.Logger.Printf("%s error: %s\n", e.Ctx, e.Error())
+ }
+}
+
+// Releases the consumer and commits offsets, called from rebalance() and Close()
+func (c *Consumer) release() (err error) {
+ // Stop all consumers
+ c.subs.Stop()
+
+ // Clear subscriptions on exit
+ defer c.subs.Clear()
+
+ // Wait for messages to be processed
+ timeout := time.NewTimer(c.client.config.Group.Offsets.Synchronization.DwellTime)
+ defer timeout.Stop()
+
+ select {
+ case <-c.dying:
+ case <-timeout.C:
+ }
+
+ // Commit offsets, continue on errors
+ if e := c.commitOffsetsWithRetry(c.client.config.Group.Offsets.Retry.Max); e != nil {
+ err = e
+ }
+
+ return
+}
+
+// --------------------------------------------------------------------
+
+// Performs a heartbeat, part of the mainLoop()
+func (c *Consumer) heartbeat() error {
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return err
+ }
+
+ memberID, generationID := c.membership()
+ resp, err := broker.Heartbeat(&sarama.HeartbeatRequest{
+ GroupId: c.groupID,
+ MemberId: memberID,
+ GenerationId: generationID,
+ })
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return err
+ }
+ return resp.Err
+}
+
+// Performs a rebalance, part of the mainLoop()
+func (c *Consumer) rebalance() (map[string][]int32, error) {
+ memberID, _ := c.membership()
+ sarama.Logger.Printf("cluster/consumer %s rebalance\n", memberID)
+
+ allTopics, err := c.client.Topics()
+ if err != nil {
+ return nil, err
+ }
+ c.extraTopics = c.selectExtraTopics(allTopics)
+ sort.Strings(c.extraTopics)
+
+ // Re-join consumer group
+ strategy, err := c.joinGroup()
+ switch {
+ case err == sarama.ErrUnknownMemberId:
+ c.membershipMu.Lock()
+ c.memberID = ""
+ c.membershipMu.Unlock()
+ return nil, err
+ case err != nil:
+ return nil, err
+ }
+
+ // Sync consumer group state, fetch subscriptions
+ subs, err := c.syncGroup(strategy)
+ switch {
+ case err == sarama.ErrRebalanceInProgress:
+ return nil, err
+ case err != nil:
+ _ = c.leaveGroup()
+ return nil, err
+ }
+ return subs, nil
+}
+
+// Performs the subscription, part of the mainLoop()
+func (c *Consumer) subscribe(tomb *loopTomb, subs map[string][]int32) error {
+ // fetch offsets
+ offsets, err := c.fetchOffsets(subs)
+ if err != nil {
+ _ = c.leaveGroup()
+ return err
+ }
+
+ // create consumers in parallel
+ var mu sync.Mutex
+ var wg sync.WaitGroup
+
+ for topic, partitions := range subs {
+ for _, partition := range partitions {
+ wg.Add(1)
+
+ info := offsets[topic][partition]
+ go func(topic string, partition int32) {
+ if e := c.createConsumer(tomb, topic, partition, info); e != nil {
+ mu.Lock()
+ err = e
+ mu.Unlock()
+ }
+ wg.Done()
+ }(topic, partition)
+ }
+ }
+ wg.Wait()
+
+ if err != nil {
+ _ = c.release()
+ _ = c.leaveGroup()
+ }
+ return err
+}
+
+// --------------------------------------------------------------------
+
+// Send a request to the broker to join group on rebalance()
+func (c *Consumer) joinGroup() (*balancer, error) {
+ memberID, _ := c.membership()
+ req := &sarama.JoinGroupRequest{
+ GroupId: c.groupID,
+ MemberId: memberID,
+ SessionTimeout: int32(c.client.config.Group.Session.Timeout / time.Millisecond),
+ ProtocolType: "consumer",
+ }
+
+ meta := &sarama.ConsumerGroupMemberMetadata{
+ Version: 1,
+ Topics: append(c.coreTopics, c.extraTopics...),
+ UserData: c.client.config.Group.Member.UserData,
+ }
+ err := req.AddGroupProtocolMetadata(string(StrategyRange), meta)
+ if err != nil {
+ return nil, err
+ }
+ err = req.AddGroupProtocolMetadata(string(StrategyRoundRobin), meta)
+ if err != nil {
+ return nil, err
+ }
+
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ }
+
+ resp, err := broker.JoinGroup(req)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ } else if resp.Err != sarama.ErrNoError {
+ c.closeCoordinator(broker, resp.Err)
+ return nil, resp.Err
+ }
+
+ var strategy *balancer
+ if resp.LeaderId == resp.MemberId {
+ members, err := resp.GetMembers()
+ if err != nil {
+ return nil, err
+ }
+
+ strategy, err = newBalancerFromMeta(c.client, members)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ c.membershipMu.Lock()
+ c.memberID = resp.MemberId
+ c.generationID = resp.GenerationId
+ c.membershipMu.Unlock()
+
+ return strategy, nil
+}
+
+// Send a request to the broker to sync the group on rebalance().
+// Returns a list of topics and partitions to consume.
+func (c *Consumer) syncGroup(strategy *balancer) (map[string][]int32, error) {
+ memberID, generationID := c.membership()
+ req := &sarama.SyncGroupRequest{
+ GroupId: c.groupID,
+ MemberId: memberID,
+ GenerationId: generationID,
+ }
+
+ if strategy != nil {
+ for memberID, topics := range strategy.Perform(c.client.config.Group.PartitionStrategy) {
+ if err := req.AddGroupAssignmentMember(memberID, &sarama.ConsumerGroupMemberAssignment{
+ Topics: topics,
+ }); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ }
+
+ resp, err := broker.SyncGroup(req)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ } else if resp.Err != sarama.ErrNoError {
+ c.closeCoordinator(broker, resp.Err)
+ return nil, resp.Err
+ }
+
+ // Return if there is nothing to subscribe to
+ if len(resp.MemberAssignment) == 0 {
+ return nil, nil
+ }
+
+ // Get assigned subscriptions
+ members, err := resp.GetMemberAssignment()
+ if err != nil {
+ return nil, err
+ }
+
+ // Sort partitions, for each topic
+ for topic := range members.Topics {
+ sort.Sort(int32Slice(members.Topics[topic]))
+ }
+ return members.Topics, nil
+}
+
+// Fetches latest committed offsets for all subscriptions
+func (c *Consumer) fetchOffsets(subs map[string][]int32) (map[string]map[int32]offsetInfo, error) {
+ offsets := make(map[string]map[int32]offsetInfo, len(subs))
+ req := &sarama.OffsetFetchRequest{
+ Version: 1,
+ ConsumerGroup: c.groupID,
+ }
+
+ for topic, partitions := range subs {
+ offsets[topic] = make(map[int32]offsetInfo, len(partitions))
+ for _, partition := range partitions {
+ offsets[topic][partition] = offsetInfo{Offset: -1}
+ req.AddPartition(topic, partition)
+ }
+ }
+
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ }
+
+ resp, err := broker.FetchOffset(req)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return nil, err
+ }
+
+ for topic, partitions := range subs {
+ for _, partition := range partitions {
+ block := resp.GetBlock(topic, partition)
+ if block == nil {
+ return nil, sarama.ErrIncompleteResponse
+ }
+
+ if block.Err == sarama.ErrNoError {
+ offsets[topic][partition] = offsetInfo{Offset: block.Offset, Metadata: block.Metadata}
+ } else {
+ return nil, block.Err
+ }
+ }
+ }
+ return offsets, nil
+}
+
+// Send a request to the broker to leave the group on failes rebalance() and on Close()
+func (c *Consumer) leaveGroup() error {
+ broker, err := c.client.Coordinator(c.groupID)
+ if err != nil {
+ c.closeCoordinator(broker, err)
+ return err
+ }
+
+ memberID, _ := c.membership()
+ if _, err = broker.LeaveGroup(&sarama.LeaveGroupRequest{
+ GroupId: c.groupID,
+ MemberId: memberID,
+ }); err != nil {
+ c.closeCoordinator(broker, err)
+ }
+ return err
+}
+
+// --------------------------------------------------------------------
+
+func (c *Consumer) createConsumer(tomb *loopTomb, topic string, partition int32, info offsetInfo) error {
+ memberID, _ := c.membership()
+ sarama.Logger.Printf("cluster/consumer %s consume %s/%d from %d\n", memberID, topic, partition, info.NextOffset(c.client.config.Consumer.Offsets.Initial))
+
+ // Create partitionConsumer
+ pc, err := newPartitionConsumer(c.consumer, topic, partition, info, c.client.config.Consumer.Offsets.Initial)
+ if err != nil {
+ return err
+ }
+
+ // Store in subscriptions
+ c.subs.Store(topic, partition, pc)
+
+ // Start partition consumer goroutine
+ tomb.Go(func(stopper <-chan none) {
+ if c.client.config.Group.Mode == ConsumerModePartitions {
+ pc.waitFor(stopper, c.errors)
+ } else {
+ pc.multiplex(stopper, c.messages, c.errors)
+ }
+ })
+
+ if c.client.config.Group.Mode == ConsumerModePartitions {
+ c.partitions <- pc
+ }
+ return nil
+}
+
+func (c *Consumer) commitOffsetsWithRetry(retries int) error {
+ err := c.CommitOffsets()
+ if err != nil && retries > 0 {
+ return c.commitOffsetsWithRetry(retries - 1)
+ }
+ return err
+}
+
+func (c *Consumer) closeCoordinator(broker *sarama.Broker, err error) {
+ if broker != nil {
+ _ = broker.Close()
+ }
+
+ switch err {
+ case sarama.ErrConsumerCoordinatorNotAvailable, sarama.ErrNotCoordinatorForConsumer:
+ _ = c.client.RefreshCoordinator(c.groupID)
+ }
+}
+
+func (c *Consumer) selectExtraTopics(allTopics []string) []string {
+ extra := allTopics[:0]
+ for _, topic := range allTopics {
+ if !c.isKnownCoreTopic(topic) && c.isPotentialExtraTopic(topic) {
+ extra = append(extra, topic)
+ }
+ }
+ return extra
+}
+
+func (c *Consumer) isKnownCoreTopic(topic string) bool {
+ pos := sort.SearchStrings(c.coreTopics, topic)
+ return pos < len(c.coreTopics) && c.coreTopics[pos] == topic
+}
+
+func (c *Consumer) isKnownExtraTopic(topic string) bool {
+ pos := sort.SearchStrings(c.extraTopics, topic)
+ return pos < len(c.extraTopics) && c.extraTopics[pos] == topic
+}
+
+func (c *Consumer) isPotentialExtraTopic(topic string) bool {
+ rx := c.client.config.Group.Topics
+ if rx.Blacklist != nil && rx.Blacklist.MatchString(topic) {
+ return false
+ }
+ if rx.Whitelist != nil && rx.Whitelist.MatchString(topic) {
+ return true
+ }
+ return false
+}
+
+func (c *Consumer) refreshCoordinator() error {
+ if err := c.refreshMetadata(); err != nil {
+ return err
+ }
+ return c.client.RefreshCoordinator(c.groupID)
+}
+
+func (c *Consumer) refreshMetadata() (err error) {
+ if c.client.config.Metadata.Full {
+ err = c.client.RefreshMetadata()
+ } else {
+ var topics []string
+ if topics, err = c.client.Topics(); err == nil && len(topics) != 0 {
+ err = c.client.RefreshMetadata(topics...)
+ }
+ }
+
+ // maybe we didn't have authorization to describe all topics
+ switch err {
+ case sarama.ErrTopicAuthorizationFailed:
+ err = c.client.RefreshMetadata(c.coreTopics...)
+ }
+ return
+}
+
+func (c *Consumer) membership() (memberID string, generationID int32) {
+ c.membershipMu.RLock()
+ memberID, generationID = c.memberID, c.generationID
+ c.membershipMu.RUnlock()
+ return
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/doc.go b/vendor/github.com/bsm/sarama-cluster/doc.go
new file mode 100644
index 00000000..9c8ff16a
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/doc.go
@@ -0,0 +1,8 @@
+/*
+Package cluster provides cluster extensions for Sarama, enabing users
+to consume topics across from multiple, balanced nodes.
+
+It requires Kafka v0.9+ and follows the steps guide, described in:
+https://cwiki.apache.org/confluence/display/KAFKA/Kafka+0.9+Consumer+Rewrite+Design
+*/
+package cluster
diff --git a/vendor/github.com/bsm/sarama-cluster/offsets.go b/vendor/github.com/bsm/sarama-cluster/offsets.go
new file mode 100644
index 00000000..4223ac5e
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/offsets.go
@@ -0,0 +1,69 @@
+package cluster
+
+import (
+ "sync"
+
+ "github.com/Shopify/sarama"
+)
+
+// OffsetStash allows to accumulate offsets and
+// mark them as processed in a bulk
+type OffsetStash struct {
+ offsets map[topicPartition]offsetInfo
+ mu sync.Mutex
+}
+
+// NewOffsetStash inits a blank stash
+func NewOffsetStash() *OffsetStash {
+ return &OffsetStash{offsets: make(map[topicPartition]offsetInfo)}
+}
+
+// MarkOffset stashes the provided message offset
+func (s *OffsetStash) MarkOffset(msg *sarama.ConsumerMessage, metadata string) {
+ s.MarkPartitionOffset(msg.Topic, msg.Partition, msg.Offset, metadata)
+}
+
+// MarkPartitionOffset stashes the offset for the provided topic/partition combination
+func (s *OffsetStash) MarkPartitionOffset(topic string, partition int32, offset int64, metadata string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ key := topicPartition{Topic: topic, Partition: partition}
+ if info := s.offsets[key]; offset >= info.Offset {
+ info.Offset = offset
+ info.Metadata = metadata
+ s.offsets[key] = info
+ }
+}
+
+// ResetPartitionOffset stashes the offset for the provided topic/partition combination.
+// Difference between ResetPartitionOffset and MarkPartitionOffset is that, ResetPartitionOffset supports earlier offsets
+func (s *OffsetStash) ResetPartitionOffset(topic string, partition int32, offset int64, metadata string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ key := topicPartition{Topic: topic, Partition: partition}
+ if info := s.offsets[key]; offset <= info.Offset {
+ info.Offset = offset
+ info.Metadata = metadata
+ s.offsets[key] = info
+ }
+}
+
+// ResetOffset stashes the provided message offset
+// See ResetPartitionOffset for explanation
+func (s *OffsetStash) ResetOffset(msg *sarama.ConsumerMessage, metadata string) {
+ s.ResetPartitionOffset(msg.Topic, msg.Partition, msg.Offset, metadata)
+}
+
+// Offsets returns the latest stashed offsets by topic-partition
+func (s *OffsetStash) Offsets() map[string]int64 {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ res := make(map[string]int64, len(s.offsets))
+ for tp, info := range s.offsets {
+ res[tp.String()] = info.Offset
+ }
+ return res
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/partitions.go b/vendor/github.com/bsm/sarama-cluster/partitions.go
new file mode 100644
index 00000000..bfaa5878
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/partitions.go
@@ -0,0 +1,290 @@
+package cluster
+
+import (
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/Shopify/sarama"
+)
+
+// PartitionConsumer allows code to consume individual partitions from the cluster.
+//
+// See docs for Consumer.Partitions() for more on how to implement this.
+type PartitionConsumer interface {
+ sarama.PartitionConsumer
+
+ // Topic returns the consumed topic name
+ Topic() string
+
+ // Partition returns the consumed partition
+ Partition() int32
+
+ // InitialOffset returns the offset used for creating the PartitionConsumer instance.
+ // The returned offset can be a literal offset, or OffsetNewest, or OffsetOldest
+ InitialOffset() int64
+
+ // MarkOffset marks the offset of a message as preocessed.
+ MarkOffset(offset int64, metadata string)
+
+ // ResetOffset resets the offset to a previously processed message.
+ ResetOffset(offset int64, metadata string)
+}
+
+type partitionConsumer struct {
+ sarama.PartitionConsumer
+
+ state partitionState
+ mu sync.Mutex
+
+ topic string
+ partition int32
+ initialOffset int64
+
+ closeOnce sync.Once
+ closeErr error
+
+ dying, dead chan none
+}
+
+func newPartitionConsumer(manager sarama.Consumer, topic string, partition int32, info offsetInfo, defaultOffset int64) (*partitionConsumer, error) {
+ offset := info.NextOffset(defaultOffset)
+ pcm, err := manager.ConsumePartition(topic, partition, offset)
+
+ // Resume from default offset, if requested offset is out-of-range
+ if err == sarama.ErrOffsetOutOfRange {
+ info.Offset = -1
+ offset = defaultOffset
+ pcm, err = manager.ConsumePartition(topic, partition, offset)
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ return &partitionConsumer{
+ PartitionConsumer: pcm,
+ state: partitionState{Info: info},
+
+ topic: topic,
+ partition: partition,
+ initialOffset: offset,
+
+ dying: make(chan none),
+ dead: make(chan none),
+ }, nil
+}
+
+// Topic implements PartitionConsumer
+func (c *partitionConsumer) Topic() string { return c.topic }
+
+// Partition implements PartitionConsumer
+func (c *partitionConsumer) Partition() int32 { return c.partition }
+
+// InitialOffset implements PartitionConsumer
+func (c *partitionConsumer) InitialOffset() int64 { return c.initialOffset }
+
+// AsyncClose implements PartitionConsumer
+func (c *partitionConsumer) AsyncClose() {
+ c.closeOnce.Do(func() {
+ c.closeErr = c.PartitionConsumer.Close()
+ close(c.dying)
+ })
+}
+
+// Close implements PartitionConsumer
+func (c *partitionConsumer) Close() error {
+ c.AsyncClose()
+ <-c.dead
+ return c.closeErr
+}
+
+func (c *partitionConsumer) waitFor(stopper <-chan none, errors chan<- error) {
+ defer close(c.dead)
+
+ for {
+ select {
+ case err, ok := <-c.Errors():
+ if !ok {
+ return
+ }
+ select {
+ case errors <- err:
+ case <-stopper:
+ return
+ case <-c.dying:
+ return
+ }
+ case <-stopper:
+ return
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+func (c *partitionConsumer) multiplex(stopper <-chan none, messages chan<- *sarama.ConsumerMessage, errors chan<- error) {
+ defer close(c.dead)
+
+ for {
+ select {
+ case msg, ok := <-c.Messages():
+ if !ok {
+ return
+ }
+ select {
+ case messages <- msg:
+ case <-stopper:
+ return
+ case <-c.dying:
+ return
+ }
+ case err, ok := <-c.Errors():
+ if !ok {
+ return
+ }
+ select {
+ case errors <- err:
+ case <-stopper:
+ return
+ case <-c.dying:
+ return
+ }
+ case <-stopper:
+ return
+ case <-c.dying:
+ return
+ }
+ }
+}
+
+func (c *partitionConsumer) getState() partitionState {
+ c.mu.Lock()
+ state := c.state
+ c.mu.Unlock()
+
+ return state
+}
+
+func (c *partitionConsumer) markCommitted(offset int64) {
+ c.mu.Lock()
+ if offset == c.state.Info.Offset {
+ c.state.Dirty = false
+ }
+ c.mu.Unlock()
+}
+
+// MarkOffset implements PartitionConsumer
+func (c *partitionConsumer) MarkOffset(offset int64, metadata string) {
+ c.mu.Lock()
+ if next := offset + 1; next > c.state.Info.Offset {
+ c.state.Info.Offset = next
+ c.state.Info.Metadata = metadata
+ c.state.Dirty = true
+ }
+ c.mu.Unlock()
+}
+
+// ResetOffset implements PartitionConsumer
+func (c *partitionConsumer) ResetOffset(offset int64, metadata string) {
+ c.mu.Lock()
+ if next := offset + 1; next <= c.state.Info.Offset {
+ c.state.Info.Offset = next
+ c.state.Info.Metadata = metadata
+ c.state.Dirty = true
+ }
+ c.mu.Unlock()
+}
+
+// --------------------------------------------------------------------
+
+type partitionState struct {
+ Info offsetInfo
+ Dirty bool
+ LastCommit time.Time
+}
+
+// --------------------------------------------------------------------
+
+type partitionMap struct {
+ data map[topicPartition]*partitionConsumer
+ mu sync.RWMutex
+}
+
+func newPartitionMap() *partitionMap {
+ return &partitionMap{
+ data: make(map[topicPartition]*partitionConsumer),
+ }
+}
+
+func (m *partitionMap) IsSubscribedTo(topic string) bool {
+ m.mu.RLock()
+ defer m.mu.RUnlock()
+
+ for tp := range m.data {
+ if tp.Topic == topic {
+ return true
+ }
+ }
+ return false
+}
+
+func (m *partitionMap) Fetch(topic string, partition int32) *partitionConsumer {
+ m.mu.RLock()
+ pc, _ := m.data[topicPartition{topic, partition}]
+ m.mu.RUnlock()
+ return pc
+}
+
+func (m *partitionMap) Store(topic string, partition int32, pc *partitionConsumer) {
+ m.mu.Lock()
+ m.data[topicPartition{topic, partition}] = pc
+ m.mu.Unlock()
+}
+
+func (m *partitionMap) Snapshot() map[topicPartition]partitionState {
+ m.mu.RLock()
+ defer m.mu.RUnlock()
+
+ snap := make(map[topicPartition]partitionState, len(m.data))
+ for tp, pc := range m.data {
+ snap[tp] = pc.getState()
+ }
+ return snap
+}
+
+func (m *partitionMap) Stop() {
+ m.mu.RLock()
+ defer m.mu.RUnlock()
+
+ var wg sync.WaitGroup
+ for tp := range m.data {
+ wg.Add(1)
+ go func(p *partitionConsumer) {
+ _ = p.Close()
+ wg.Done()
+ }(m.data[tp])
+ }
+ wg.Wait()
+}
+
+func (m *partitionMap) Clear() {
+ m.mu.Lock()
+ for tp := range m.data {
+ delete(m.data, tp)
+ }
+ m.mu.Unlock()
+}
+
+func (m *partitionMap) Info() map[string][]int32 {
+ info := make(map[string][]int32)
+ m.mu.RLock()
+ for tp := range m.data {
+ info[tp.Topic] = append(info[tp.Topic], tp.Partition)
+ }
+ m.mu.RUnlock()
+
+ for topic := range info {
+ sort.Sort(int32Slice(info[topic]))
+ }
+ return info
+}
diff --git a/vendor/github.com/bsm/sarama-cluster/util.go b/vendor/github.com/bsm/sarama-cluster/util.go
new file mode 100644
index 00000000..e7cb5dd1
--- /dev/null
+++ b/vendor/github.com/bsm/sarama-cluster/util.go
@@ -0,0 +1,75 @@
+package cluster
+
+import (
+ "fmt"
+ "sort"
+ "sync"
+)
+
+type none struct{}
+
+type topicPartition struct {
+ Topic string
+ Partition int32
+}
+
+func (tp *topicPartition) String() string {
+ return fmt.Sprintf("%s-%d", tp.Topic, tp.Partition)
+}
+
+type offsetInfo struct {
+ Offset int64
+ Metadata string
+}
+
+func (i offsetInfo) NextOffset(fallback int64) int64 {
+ if i.Offset > -1 {
+ return i.Offset
+ }
+ return fallback
+}
+
+type int32Slice []int32
+
+func (p int32Slice) Len() int { return len(p) }
+func (p int32Slice) Less(i, j int) bool { return p[i] < p[j] }
+func (p int32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+func (p int32Slice) Diff(o int32Slice) (res []int32) {
+ on := len(o)
+ for _, x := range p {
+ n := sort.Search(on, func(i int) bool { return o[i] >= x })
+ if n < on && o[n] == x {
+ continue
+ }
+ res = append(res, x)
+ }
+ return
+}
+
+// --------------------------------------------------------------------
+
+type loopTomb struct {
+ c chan none
+ o sync.Once
+ w sync.WaitGroup
+}
+
+func newLoopTomb() *loopTomb {
+ return &loopTomb{c: make(chan none)}
+}
+
+func (t *loopTomb) stop() { t.o.Do(func() { close(t.c) }) }
+func (t *loopTomb) Close() { t.stop(); t.w.Wait() }
+
+func (t *loopTomb) Dying() <-chan none { return t.c }
+func (t *loopTomb) Go(f func(<-chan none)) {
+ t.w.Add(1)
+
+ go func() {
+ defer t.stop()
+ defer t.w.Done()
+
+ f(t.c)
+ }()
+}
diff --git a/vendor/github.com/burdiyan/kafkautil/LICENSE b/vendor/github.com/burdiyan/kafkautil/LICENSE
new file mode 100644
index 00000000..469bed80
--- /dev/null
+++ b/vendor/github.com/burdiyan/kafkautil/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Alexandr Burdiyan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/burdiyan/kafkautil/README.md b/vendor/github.com/burdiyan/kafkautil/README.md
new file mode 100644
index 00000000..2ac6f171
--- /dev/null
+++ b/vendor/github.com/burdiyan/kafkautil/README.md
@@ -0,0 +1,3 @@
+# kafkautil [![GoDoc](https://godoc.org/github.com/burdiyan/kafkautil?status.svg)](https://godoc.org/github.com/burdiyan/kafkautil)
+
+This repo holds several utilities that can be useful if you use Go and [Goka](https://gitub.com/lovoo/goka) for stream processing with Apache Kafka on [Confluent Platform](https://confluent.io).
\ No newline at end of file
diff --git a/vendor/github.com/burdiyan/kafkautil/goka_avro.go b/vendor/github.com/burdiyan/kafkautil/goka_avro.go
new file mode 100644
index 00000000..3b352c4e
--- /dev/null
+++ b/vendor/github.com/burdiyan/kafkautil/goka_avro.go
@@ -0,0 +1,138 @@
+package kafkautil
+
+import (
+ "bytes"
+ "encoding/binary"
+ "io"
+ "sync"
+
+ "github.com/avast/retry-go"
+ "github.com/lovoo/goka"
+ "github.com/pkg/errors"
+)
+
+const magicByte byte = 0
+
+type avroRecord interface {
+ Serialize(io.Writer) error
+ Schema() string
+}
+
+type schemaRegisterer interface {
+ RegisterNewSchema(subject string, schema string) (int, error)
+}
+
+// CodecWrapper wraps Avro goka.Codec to be compatible with
+// Confluent Schema registry wire format.
+type CodecWrapper interface {
+ WrapCodec(c goka.Codec, subject string) goka.Codec
+}
+
+type codecWrapper struct {
+ rc schemaRegisterer
+}
+
+// WrapCodec implements CodecWrapper.
+func (cw *codecWrapper) WrapCodec(c goka.Codec, subject string) goka.Codec {
+ return &avroCodec{
+ subject: subject,
+ codec: c,
+ client: cw.rc,
+ schemaCache: make(map[string]int32),
+ }
+}
+
+// NewCodecWrapper creates new CodecWrapper using provided Schema Registry client.
+func NewCodecWrapper(rc schemaRegisterer) CodecWrapper {
+ return &codecWrapper{rc: rc}
+}
+
+type avroCodec struct {
+ subject string
+ codec goka.Codec
+ client schemaRegisterer
+
+ mu sync.RWMutex
+ schemaCache map[string]int32
+}
+
+func (c *avroCodec) getSchema(schema string) (int32, bool) {
+ c.mu.RLock()
+ s, ok := c.schemaCache[schema]
+ c.mu.RUnlock()
+ return s, ok
+}
+
+func (c *avroCodec) saveSchema(schema string, id int32) {
+ c.mu.Lock()
+ c.schemaCache[schema] = id
+ c.mu.Unlock()
+}
+
+// Encode implements goka.Codec and encodes value to Avro
+// using Confluent Schema Registry wire format. It will register
+// schema in the Schema Registry and cache the registered ID.
+func (c *avroCodec) Encode(value interface{}) ([]byte, error) {
+ v, ok := value.(avroRecord)
+ if !ok {
+ return nil, errors.Errorf("%T must implement avroRecord interface", value)
+ }
+
+ // If schema is not cached, we try to register it in Schema Registry retrying if it fails.
+ id, ok := c.getSchema(v.Schema())
+ if !ok {
+ if err := retry.Do(func() error {
+ schemaID, err := c.client.RegisterNewSchema(c.subject, v.Schema())
+ if err != nil {
+ return err
+ }
+ id = int32(schemaID)
+ c.saveSchema(v.Schema(), id)
+ return nil
+ }, retry.Attempts(5)); err != nil {
+ return nil, err
+ }
+ }
+
+ var b bytes.Buffer
+ b.WriteByte(magicByte)
+
+ idBytes := make([]byte, 4)
+ binary.BigEndian.PutUint32(idBytes, uint32(id))
+ b.Write(idBytes)
+
+ // We don't invoke underlying codec to avoid allocating another buffer,
+ // and also because Avro types know how to serialize themselves, and codec is
+ // just to comply with the interface required by goka.
+ v.Serialize(&b)
+
+ return b.Bytes(), nil
+}
+
+// Decode extracts Avro payload from Confluent Schema Registry wire format,
+// and decodes it using underlying goka.Codec for this specific type.
+func (c *avroCodec) Decode(data []byte) (interface{}, error) {
+ if err := validateAvro(data); err != nil {
+ return nil, err
+ }
+
+ return c.codec.Decode(data[5:])
+}
+
+func validateAvro(b []byte) error {
+ if len(b) == 0 {
+ return errors.New("avro: payload is empty")
+ }
+
+ if b[0] != 0 {
+ return errors.Errorf("avro: wrong magic byte for confluent avro encoding: %v", b[0])
+ }
+
+ // Message encoded with Confluent Avro encoding cannot be less than 5 bytes,
+ // because first byte is a magic byte, and next 4 bytes is a mandatory schema ID.
+ if len(b) < 5 {
+ return errors.New("avro: payload is less than 5 bytes")
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/burdiyan/kafkautil/partitioner.go b/vendor/github.com/burdiyan/kafkautil/partitioner.go
new file mode 100644
index 00000000..b829a8c9
--- /dev/null
+++ b/vendor/github.com/burdiyan/kafkautil/partitioner.go
@@ -0,0 +1,98 @@
+package kafkautil
+
+import (
+ "hash"
+
+ "github.com/Shopify/sarama"
+)
+
+// NewJVMCompatiblePartitioner creates a Sarama partitioner that uses
+// the same hashing algorithm as JVM Kafka clients.
+func NewJVMCompatiblePartitioner(topic string) sarama.Partitioner {
+ return sarama.NewCustomHashPartitioner(MurmurHasher)(topic)
+}
+
+// murmurHash implements hash.Hash32 interface,
+// solely to conform to required hasher for Sarama.
+// it does not support streaming since it is not required for Sarama.
+type murmurHash struct {
+ v int32
+}
+
+// MurmurHasher creates murmur2 hasher implementing hash.Hash32 interface.
+// The implementation is not full and does not support streaming.
+// It only implements the interface to comply with sarama.NewCustomHashPartitioner signature.
+// But Sarama only uses Write method once, when writing keys and values of the message,
+// so streaming support is not necessary.
+func MurmurHasher() hash.Hash32 {
+ return new(murmurHash)
+}
+
+func (m *murmurHash) Write(d []byte) (n int, err error) {
+ n = len(d)
+ m.v = murmur2(d)
+ return
+}
+
+func (m *murmurHash) Reset() {
+ m.v = 0
+}
+
+func (m *murmurHash) Size() int { return 32 }
+
+func (m *murmurHash) BlockSize() int { return 4 }
+
+// Sum is noop.
+func (m *murmurHash) Sum(in []byte) []byte {
+ return in
+}
+
+func (m *murmurHash) Sum32() uint32 {
+ return uint32(toPositive(m.v))
+}
+
+// murmur2 implements hashing algorithm used by JVM clients for Kafka.
+// See the original implementation: https://github.com/apache/kafka/blob/1.0.0/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L353
+func murmur2(data []byte) int32 {
+ length := int32(len(data))
+ seed := uint32(0x9747b28c)
+ m := int32(0x5bd1e995)
+ r := uint32(24)
+
+ h := int32(seed ^ uint32(length))
+ length4 := length / 4
+
+ for i := int32(0); i < length4; i++ {
+ i4 := i * 4
+ k := int32(data[i4+0]&0xff) + (int32(data[i4+1]&0xff) << 8) + (int32(data[i4+2]&0xff) << 16) + (int32(data[i4+3]&0xff) << 24)
+ k *= m
+ k ^= int32(uint32(k) >> r)
+ k *= m
+ h *= m
+ h ^= k
+ }
+
+ switch length % 4 {
+ case 3:
+ h ^= int32(data[(length & ^3)+2]&0xff) << 16
+ fallthrough
+ case 2:
+ h ^= int32(data[(length & ^3)+1]&0xff) << 8
+ fallthrough
+ case 1:
+ h ^= int32(data[length & ^3] & 0xff)
+ h *= m
+ }
+
+ h ^= int32(uint32(h) >> 13)
+ h *= m
+ h ^= int32(uint32(h) >> 15)
+
+ return h
+}
+
+// toPositive converts i to positive number as per the original implementation in the JVM clients for Kafka.
+// See the original implementation: https://github.com/apache/kafka/blob/1.0.0/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L741
+func toPositive(i int32) int32 {
+ return i & 0x7fffffff
+}
diff --git a/vendor/github.com/lovoo/goka/.gitignore b/vendor/github.com/lovoo/goka/.gitignore
new file mode 100644
index 00000000..ae971837
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/.gitignore
@@ -0,0 +1,3 @@
+tmp*
+*.*~
+.tags*
diff --git a/vendor/github.com/lovoo/goka/.travis.yml b/vendor/github.com/lovoo/goka/.travis.yml
new file mode 100644
index 00000000..3fb56c5f
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/.travis.yml
@@ -0,0 +1,4 @@
+language: go
+
+go:
+ - 1.9
diff --git a/vendor/github.com/lovoo/goka/LICENSE b/vendor/github.com/lovoo/goka/LICENSE
new file mode 100644
index 00000000..a14596cf
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2017 LOVOO GmbH
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/lovoo/goka/README.md b/vendor/github.com/lovoo/goka/README.md
new file mode 100644
index 00000000..80ef6649
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/README.md
@@ -0,0 +1,163 @@
+# Goka [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![Build Status](https://travis-ci.org/lovoo/goka.svg?branch=master)](https://travis-ci.org/lovoo/goka) [![GoDoc](https://godoc.org/github.com/lovoo/goka?status.svg)](https://godoc.org/github.com/lovoo/goka)
+
+
+Goka is a compact yet powerful distributed stream processing library for [Apache Kafka] written in Go. Goka aims to reduce the complexity of building highly scalable and highly available microservices.
+
+Goka extends the concept of Kafka consumer groups by binding a state table to them and persisting them in Kafka. Goka provides sane defaults and a pluggable architecture.
+
+## Features
+ * **Message Input and Output**
+
+ Goka handles all the message input and output for you. You only have to provide one or more callback functions that handle messages from any of the Kafka topics you are interested in. You only ever have to deal with deserialized messages.
+
+ * **Scaling**
+
+ Goka automatically distributes the processing and state across multiple instances of a service. This enables effortless scaling when the load increases.
+
+ * **Fault Tolerance**
+
+ In case of a failure, Goka will redistribute the failed instance's workload and state across the remaining healthy instances. All state is safely stored in Kafka and messages delivered with *at-least-once* semantics.
+
+ * **Built-in Monitoring and Introspection**
+
+ Goka provides a web interface for monitoring performance and querying values in the state.
+
+ * **Modularity**
+
+ Goka fosters a pluggable architecture which enables you to replace for example the storage layer or the Kafka communication layer.
+
+## Documentation
+
+This README provides a brief, high level overview of the ideas behind Goka.
+A more detailed introduction of the project can be found in this [blog post](https://tech.lovoo.com/2017/05/23/goka/).
+
+Package API documentation is available at [GoDoc] and the [Wiki](https://github.com/lovoo/goka/wiki/Tips#configuring-log-compaction-for-table-topics) provides several tips for configuring, extending, and deploying Goka applications.
+
+## Installation
+
+You can install Goka by running the following command:
+
+``$ go get -u github.com/lovoo/goka``
+
+## Concepts
+
+Goka relies on Kafka for message passing, fault-tolerant state storage and workload partitioning.
+
+* **Emitters** deliver key-value messages into Kafka. As an example, an emitter could be a database handler emitting the state changes into Kafka for other interested applications to consume.
+
+* **Processor** is a set of callback functions that consume and perform state transformations upon delivery of these emitted messages. *Processor groups* are formed of one or more instances of a processor. Goka distributes the partitions of the input topics across all processor instances in a processor group. This enables effortless scaling and fault-tolerance. If a processor instance fails, its partitions and state are reassigned to the remaining healthy members of the processor group. Processors can also emit further messages into Kafka.
+
+* **Group table** is the state of a processor group. It is a partitioned key-value table stored in Kafka that belongs to a single processor group. If a processor instance fails, the remaining instances will take over the group table partitions of the failed instance recovering them from Kafka.
+
+* **Views** are local caches of a complete group table. Views provide read-only access to the group tables and can be used to provide external services for example through a gRPC interface.
+
+* **Local storage** keeps a local copy of the group table partitions to speedup recovery and reduce memory utilization. By default, the local storage uses [LevelDB](https://github.com/syndtr/goleveldb), but in-memory map and [Redis-based storage](https://github.com/lovoo/goka/tree/master/storage/redis) are also available.
+
+
+## Get Started
+
+An example Goka application could look like the following.
+An emitter emits a single message with key "some-key" and value "some-value" into the "example-stream" topic.
+A processor processes the "example-stream" topic counting the number of messages delivered for "some-key".
+The counter is persisted in the "example-group-table" topic.
+To locally start a dockerized Zookeeper and Kafka instances, execute `make start` with the `Makefile` in the [examples] folder.
+
+```go
+package main
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+
+ "github.com/lovoo/goka"
+ "github.com/lovoo/goka/codec"
+)
+
+var (
+ brokers = []string{"localhost:9092"}
+ topic goka.Stream = "example-stream"
+ group goka.Group = "example-group"
+)
+
+// emits a single message and leave
+func runEmitter() {
+ emitter, err := goka.NewEmitter(brokers, topic, new(codec.String))
+ if err != nil {
+ log.Fatalf("error creating emitter: %v", err)
+ }
+ defer emitter.Finish()
+ err = emitter.EmitSync("some-key", "some-value")
+ if err != nil {
+ log.Fatalf("error emitting message: %v", err)
+ }
+ fmt.Println("message emitted")
+}
+
+// process messages until ctrl-c is pressed
+func runProcessor() {
+ // process callback is invoked for each message delivered from
+ // "example-stream" topic.
+ cb := func(ctx goka.Context, msg interface{}) {
+ var counter int64
+ // ctx.Value() gets from the group table the value that is stored for
+ // the message's key.
+ if val := ctx.Value(); val != nil {
+ counter = val.(int64)
+ }
+ counter++
+ // SetValue stores the incremented counter in the group table for in
+ // the message's key.
+ ctx.SetValue(counter)
+ log.Printf("key = %s, counter = %v, msg = %v", ctx.Key(), counter, msg)
+ }
+
+ // Define a new processor group. The group defines all inputs, outputs, and
+ // serialization formats. The group-table topic is "example-group-table".
+ g := goka.DefineGroup(group,
+ goka.Input(topic, new(codec.String), cb),
+ goka.Persist(new(codec.Int64)),
+ )
+
+ p, err := goka.NewProcessor(brokers, g)
+ if err != nil {
+ log.Fatalf("error creating processor: %v", err)
+ }
+ ctx, cancel := context.WithCancel(context.Background())
+ done := make(chan bool)
+ go func() {
+ defer close(done)
+ if err = p.Run(ctx); err != nil {
+ log.Fatalf("error running processor: %v", err)
+ }
+ }()
+
+ wait := make(chan os.Signal, 1)
+ signal.Notify(wait, syscall.SIGINT, syscall.SIGTERM)
+ <-wait // wait for SIGINT/SIGTERM
+ cancel() // gracefully stop processor
+ <-done
+}
+
+func main() {
+ runEmitter() // emits one message and stops
+ runProcessor() // press ctrl-c to stop
+}
+```
+
+Note that tables have to be configured in Kafka with log compaction.
+For details check the [Wiki](https://github.com/lovoo/goka/wiki/Tips#configuring-log-compaction-for-table-topics).
+
+## How to contribute
+
+Contributions are always welcome.
+Please fork the repo, create a pull request against master, and be sure tests pass.
+See the [GitHub Flow] for details.
+
+[Apache Kafka]: https://kafka.apache.org/
+[GoDoc]: https://godoc.org/github.com/lovoo/goka
+[examples]: https://github.com/lovoo/goka/tree/master/examples
+[GitHub Flow]: https://guides.github.com/introduction/flow
diff --git a/vendor/github.com/lovoo/goka/codec.go b/vendor/github.com/lovoo/goka/codec.go
new file mode 100644
index 00000000..832c20be
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/codec.go
@@ -0,0 +1,7 @@
+package goka
+
+// Codec decodes and encodes from and to []byte
+type Codec interface {
+ Encode(value interface{}) (data []byte, err error)
+ Decode(data []byte) (value interface{}, err error)
+}
diff --git a/vendor/github.com/lovoo/goka/context.go b/vendor/github.com/lovoo/goka/context.go
new file mode 100644
index 00000000..d8917488
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/context.go
@@ -0,0 +1,365 @@
+package goka
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/multierr"
+ "github.com/lovoo/goka/storage"
+)
+
+// Context provides access to the processor's table and emit capabilities to
+// arbitrary topics in kafka.
+// Upon arrival of a message from subscribed topics, the respective
+// ConsumeCallback is invoked with a context object along
+// with the input message.
+type Context interface {
+ // Topic returns the topic of input message.
+ Topic() Stream
+
+ // Key returns the key of the input message.
+ Key() string
+
+ // Partition returns the partition of the input message.
+ Partition() int32
+
+ // Offset returns the offset of the input message.
+ Offset() int64
+
+ // Value returns the value of the key in the group table.
+ Value() interface{}
+
+ // Headers returns the headers of the input message
+ Headers() map[string][]byte
+
+ // SetValue updates the value of the key in the group table.
+ SetValue(value interface{})
+
+ // Delete deletes a value from the group table. IMPORTANT: this deletes the
+ // value associated with the key from both the local cache and the persisted
+ // table in Kafka.
+ Delete()
+
+ // Timestamp returns the timestamp of the input message. If the timestamp is
+ // invalid, a zero time will be returned.
+ Timestamp() time.Time
+
+ // Join returns the value of key in the copartitioned table.
+ Join(topic Table) interface{}
+
+ // Lookup returns the value of key in the view of table.
+ Lookup(topic Table, key string) interface{}
+
+ // Emit asynchronously writes a message into a topic.
+ Emit(topic Stream, key string, value interface{})
+
+ // Loopback asynchronously sends a message to another key of the group
+ // table. Value passed to loopback is encoded via the codec given in the
+ // Loop subscription.
+ Loopback(key string, value interface{})
+
+ // Fail stops execution and shuts down the processor
+ Fail(err error)
+
+ // Context returns the underlying context used to start the processor or a
+ // subcontext.
+ Context() context.Context
+}
+
+type emitter func(topic string, key string, value []byte) *kafka.Promise
+
+type cbContext struct {
+ ctx context.Context
+ graph *GroupGraph
+
+ commit func()
+ emitter emitter
+ failer func(err error)
+
+ storage storage.Storage
+ pviews map[string]*partition
+ views map[string]*View
+
+ pstats *PartitionStats
+
+ msg *message
+ done bool
+ counters struct {
+ emits int
+ dones int
+ stores int
+ }
+ errors multierr.Errors
+ m sync.Mutex
+ wg *sync.WaitGroup
+}
+
+// Emit sends a message asynchronously to a topic.
+func (ctx *cbContext) Emit(topic Stream, key string, value interface{}) {
+ if topic == "" {
+ ctx.Fail(errors.New("cannot emit to empty topic"))
+ }
+ if loopName(ctx.graph.Group()) == string(topic) {
+ ctx.Fail(errors.New("cannot emit to loop topic (use Loopback instead)"))
+ }
+ if tableName(ctx.graph.Group()) == string(topic) {
+ ctx.Fail(errors.New("cannot emit to table topic (use SetValue instead)"))
+ }
+ c := ctx.graph.codec(string(topic))
+ if c == nil {
+ ctx.Fail(fmt.Errorf("no codec for topic %s", topic))
+ }
+
+ var data []byte
+ if value != nil {
+ var err error
+ data, err = c.Encode(value)
+ if err != nil {
+ ctx.Fail(fmt.Errorf("error encoding message for topic %s: %v", topic, err))
+ }
+ }
+
+ ctx.emit(string(topic), key, data)
+}
+
+// Loopback sends a message to another key of the processor.
+func (ctx *cbContext) Loopback(key string, value interface{}) {
+ l := ctx.graph.LoopStream()
+ if l == nil {
+ ctx.Fail(errors.New("no loop topic configured"))
+ }
+
+ data, err := l.Codec().Encode(value)
+ if err != nil {
+ ctx.Fail(fmt.Errorf("error encoding message for key %s: %v", key, err))
+ }
+
+ ctx.emit(l.Topic(), key, data)
+}
+
+func (ctx *cbContext) emit(topic string, key string, value []byte) {
+ ctx.counters.emits++
+ ctx.emitter(topic, key, value).Then(func(err error) {
+ if err != nil {
+ err = fmt.Errorf("error emitting to %s: %v", topic, err)
+ }
+ ctx.emitDone(err)
+ })
+
+ s := ctx.pstats.Output[topic]
+ s.Count++
+ s.Bytes += len(value)
+ ctx.pstats.Output[topic] = s
+}
+
+func (ctx *cbContext) Delete() {
+ if err := ctx.deleteKey(ctx.Key()); err != nil {
+ ctx.Fail(err)
+ }
+}
+
+// Value returns the value of the key in the group table.
+func (ctx *cbContext) Value() interface{} {
+ val, err := ctx.valueForKey(ctx.msg.Key)
+ if err != nil {
+ ctx.Fail(err)
+ }
+ return val
+}
+
+// SetValue updates the value of the key in the group table.
+func (ctx *cbContext) SetValue(value interface{}) {
+ if err := ctx.setValueForKey(ctx.msg.Key, value); err != nil {
+ ctx.Fail(err)
+ }
+}
+
+// Timestamp returns the timestamp of the input message.
+func (ctx *cbContext) Timestamp() time.Time {
+ return ctx.msg.Timestamp
+}
+
+func (ctx *cbContext) Key() string {
+ return ctx.msg.Key
+}
+
+func (ctx *cbContext) Topic() Stream {
+ return Stream(ctx.msg.Topic)
+}
+
+func (ctx *cbContext) Offset() int64 {
+ return ctx.msg.Offset
+}
+
+func (ctx *cbContext) Partition() int32 {
+ return ctx.msg.Partition
+}
+
+func (ctx *cbContext) Headers() map[string][]byte {
+ return ctx.msg.Header
+}
+
+func (ctx *cbContext) Join(topic Table) interface{} {
+ if ctx.pviews == nil {
+ ctx.Fail(fmt.Errorf("table %s not subscribed", topic))
+ }
+ v, ok := ctx.pviews[string(topic)]
+ if !ok {
+ ctx.Fail(fmt.Errorf("table %s not subscribed", topic))
+ }
+ data, err := v.st.Get(ctx.Key())
+ if err != nil {
+ ctx.Fail(fmt.Errorf("error getting key %s of table %s: %v", ctx.Key(), topic, err))
+ } else if data == nil {
+ return nil
+ }
+
+ value, err := ctx.graph.codec(string(topic)).Decode(data)
+ if err != nil {
+ ctx.Fail(fmt.Errorf("error decoding value key %s of table %s: %v", ctx.Key(), topic, err))
+ }
+ return value
+}
+
+func (ctx *cbContext) Lookup(topic Table, key string) interface{} {
+ if ctx.views == nil {
+ ctx.Fail(fmt.Errorf("topic %s not subscribed", topic))
+ }
+ v, ok := ctx.views[string(topic)]
+ if !ok {
+ ctx.Fail(fmt.Errorf("topic %s not subscribed", topic))
+ }
+ val, err := v.Get(key)
+ if err != nil {
+ ctx.Fail(fmt.Errorf("error getting key %s of table %s: %v", key, topic, err))
+ }
+ return val
+}
+
+// valueForKey returns the value of key in the processor state.
+func (ctx *cbContext) valueForKey(key string) (interface{}, error) {
+ if ctx.storage == nil {
+ return nil, fmt.Errorf("Cannot access state in stateless processor")
+ }
+
+ data, err := ctx.storage.Get(key)
+ if err != nil {
+ return nil, fmt.Errorf("error reading value: %v", err)
+ } else if data == nil {
+ return nil, nil
+ }
+
+ value, err := ctx.graph.GroupTable().Codec().Decode(data)
+ if err != nil {
+ return nil, fmt.Errorf("error decoding value: %v", err)
+ }
+ return value, nil
+}
+
+func (ctx *cbContext) deleteKey(key string) error {
+ if ctx.graph.GroupTable() == nil {
+ return fmt.Errorf("Cannot access state in stateless processor")
+ }
+
+ ctx.counters.stores++
+ if err := ctx.storage.Delete(key); err != nil {
+ return fmt.Errorf("error deleting key (%s) from storage: %v", key, err)
+ }
+
+ ctx.counters.emits++
+ ctx.emitter(ctx.graph.GroupTable().Topic(), key, nil).Then(func(err error) {
+ ctx.emitDone(err)
+ })
+
+ return nil
+}
+
+// setValueForKey sets a value for a key in the processor state.
+func (ctx *cbContext) setValueForKey(key string, value interface{}) error {
+ if ctx.graph.GroupTable() == nil {
+ return fmt.Errorf("Cannot access state in stateless processor")
+ }
+
+ if value == nil {
+ return fmt.Errorf("cannot set nil as value")
+ }
+
+ encodedValue, err := ctx.graph.GroupTable().Codec().Encode(value)
+ if err != nil {
+ return fmt.Errorf("error encoding value: %v", err)
+ }
+
+ ctx.counters.stores++
+ if err = ctx.storage.Set(key, encodedValue); err != nil {
+ return fmt.Errorf("error storing value: %v", err)
+ }
+
+ table := ctx.graph.GroupTable().Topic()
+ ctx.counters.emits++
+ ctx.emitter(table, key, encodedValue).Then(func(err error) {
+ ctx.emitDone(err)
+ })
+
+ s := ctx.pstats.Output[table]
+ s.Count++
+ s.Bytes += len(encodedValue)
+ ctx.pstats.Output[table] = s
+
+ return nil
+}
+
+func (ctx *cbContext) emitDone(err error) {
+ ctx.m.Lock()
+ defer ctx.m.Unlock()
+ ctx.counters.dones++
+ ctx.tryCommit(err)
+}
+
+// called after all emits
+func (ctx *cbContext) finish(err error) {
+ ctx.m.Lock()
+ defer ctx.m.Unlock()
+ ctx.done = true
+ ctx.tryCommit(err)
+}
+
+// called before any emit
+func (ctx *cbContext) start() {
+ ctx.wg.Add(1)
+}
+
+// calls ctx.commit once all emits have successfully finished, or fails context
+// if some emit failed.
+func (ctx *cbContext) tryCommit(err error) {
+ if err != nil {
+ _ = ctx.errors.Collect(err)
+ }
+
+ // not all calls are done yet, do not send the ack upstream.
+ if !ctx.done || ctx.counters.emits > ctx.counters.dones {
+ return
+ }
+
+ // commit if no errors, otherwise fail context
+ if ctx.errors.HasErrors() {
+ ctx.failer(ctx.errors.NilOrError())
+ } else {
+ ctx.commit()
+ }
+
+ // no further callback will be called from this context
+ ctx.wg.Done()
+}
+
+// Fail stops execution and shuts down the processor
+func (ctx *cbContext) Fail(err error) {
+ panic(err)
+}
+
+func (ctx *cbContext) Context() context.Context {
+ return ctx.ctx
+}
diff --git a/vendor/github.com/lovoo/goka/doc.go b/vendor/github.com/lovoo/goka/doc.go
new file mode 100644
index 00000000..3a9f8360
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/doc.go
@@ -0,0 +1,38 @@
+//go:generate go-bindata -pkg templates -o web/templates/bindata.go web/templates/common/ web/templates/monitor/ web/templates/query/ web/templates/index
+//go:generate mockgen -package mock -destination mock/storage.go github.com/lovoo/goka/storage Storage
+//go:generate mockgen -package mock -destination mock/proxy.go -aux_files storage=storage/storage.go -source partition.go kafkaProxy
+//go:generate mockgen -package mock -destination mock/kafka.go github.com/lovoo/goka/kafka Consumer,TopicManager,Producer
+
+/*
+Package goka is a stateful stream processing library for Apache Kafka (version 0.9+) that eases
+the development of microservices.
+Goka extends the concept of consumer group with a group table, which represents the state of the
+group.
+A microservice modifies and serves the content of a table employing two complementary object types:
+processors and views.
+
+Processors
+
+A processor is a set of callback functions that modify the group table when messages arrive and may
+also emit messages into other topics.
+Messages as well as rows in the group table are key-value pairs.
+Callbacks receive the arriving message and the row addressed by the message's key.
+
+In Kafka, keys are used to partition topics.
+A goka processor consumes from a set of co-partitioned topics (topics with the same number of
+partitions and the same key range).
+A group topic keeps track of the group table updates, allowing for recovery and rebalancing of
+processors:
+When multiple processor instances start in the same consumer group, the instances split the
+co-partitioned input topics and load the respective group table partitions from the group topic.
+A local disk storage minimizes recovery time by caching partitions of group table.
+
+Views
+
+A view is a materialized (ie, persistent) cache of a group table.
+A view subscribes for the updates of all partitions of a group table and keeps local disk storage
+in sync with the group topic.
+With a view, one can easily serve up-to-date content of the group table via, for example, gRPC.
+
+*/
+package goka
diff --git a/vendor/github.com/lovoo/goka/emitter.go b/vendor/github.com/lovoo/goka/emitter.go
new file mode 100644
index 00000000..4485b53f
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/emitter.go
@@ -0,0 +1,93 @@
+package goka
+
+import (
+ "fmt"
+ "sync"
+
+ "github.com/lovoo/goka/kafka"
+)
+
+// Emitter emits messages into a specific Kafka topic, first encoding the message with the given codec.
+type Emitter struct {
+ codec Codec
+ producer kafka.Producer
+
+ topic string
+
+ wg sync.WaitGroup
+}
+
+// NewEmitter creates a new emitter using passed brokers, topic, codec and possibly options.
+func NewEmitter(brokers []string, topic Stream, codec Codec, options ...EmitterOption) (*Emitter, error) {
+ options = append(
+ // default options comes first
+ []EmitterOption{},
+
+ // user-defined options (may overwrite default ones)
+ options...,
+ )
+
+ opts := new(eoptions)
+
+ err := opts.applyOptions(topic, codec, options...)
+ if err != nil {
+ return nil, fmt.Errorf(errApplyOptions, err)
+ }
+
+ prod, err := opts.builders.producer(brokers, opts.clientID, opts.hasher)
+ if err != nil {
+ return nil, fmt.Errorf(errBuildProducer, err)
+ }
+
+ return &Emitter{
+ codec: codec,
+ producer: prod,
+ topic: string(topic),
+ }, nil
+}
+
+// Emit sends a message for passed key using the emitter's codec.
+func (e *Emitter) Emit(key string, msg interface{}) (*kafka.Promise, error) {
+ var (
+ err error
+ data []byte
+ )
+
+ if msg != nil {
+ data, err = e.codec.Encode(msg)
+ if err != nil {
+ return nil, fmt.Errorf("Error encoding value for key %s in topic %s: %v", key, e.topic, err)
+ }
+ }
+ e.wg.Add(1)
+ return e.producer.Emit(e.topic, key, data).Then(func(err error) {
+ e.wg.Done()
+ }), nil
+}
+
+// EmitSync sends a message to passed topic and key.
+func (e *Emitter) EmitSync(key string, msg interface{}) error {
+ var (
+ err error
+ promise *kafka.Promise
+ )
+ promise, err = e.Emit(key, msg)
+
+ if err != nil {
+ return err
+ }
+
+ done := make(chan struct{})
+ promise.Then(func(asyncErr error) {
+ err = asyncErr
+ close(done)
+ })
+ <-done
+ return err
+}
+
+// Finish waits until the emitter is finished producing all pending messages.
+func (e *Emitter) Finish() error {
+ e.wg.Wait()
+ return e.producer.Close()
+}
diff --git a/vendor/github.com/lovoo/goka/errors.go b/vendor/github.com/lovoo/goka/errors.go
new file mode 100644
index 00000000..5f0316fa
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/errors.go
@@ -0,0 +1,7 @@
+package goka
+
+var (
+ errBuildConsumer = "error creating Kafka consumer: %v"
+ errBuildProducer = "error creating Kafka producer: %v"
+ errApplyOptions = "error applying options: %v"
+)
diff --git a/vendor/github.com/lovoo/goka/graph.go b/vendor/github.com/lovoo/goka/graph.go
new file mode 100644
index 00000000..1ec5ad49
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/graph.go
@@ -0,0 +1,375 @@
+package goka
+
+import (
+ "errors"
+ "fmt"
+ "strings"
+)
+
+var (
+ tableSuffix = "-table"
+ loopSuffix = "-loop"
+)
+
+// Stream is the name of an event stream topic in Kafka, ie, a topic with
+// cleanup.policy=delete
+type Stream string
+
+// Streams is a slice of Stream names.
+type Streams []Stream
+
+// Table is the name of a table topic in Kafka, ie, a topic with
+// cleanup.policy=compact
+type Table string
+
+// Group is the name of a consumer group in Kafka and represents a processor
+// group in Goka. A processor group may have a group table and a group loopback
+// stream. By default, the group table is named -table and the loopback
+// stream -loop.
+type Group string
+
+// GroupGraph is the specification of a processor group. It contains all input,
+// output, and any other topic from which and into which the processor group
+// may consume or produce events. Each of these links to Kafka is called Edge.
+type GroupGraph struct {
+ group string
+ inputTables []Edge
+ crossTables []Edge
+ inputStreams []Edge
+ outputStreams []Edge
+ loopStream []Edge
+ groupTable []Edge
+
+ codecs map[string]Codec
+ callbacks map[string]ProcessCallback
+
+ joinCheck map[string]bool
+}
+
+// Group returns the group name.
+func (gg *GroupGraph) Group() Group {
+ return Group(gg.group)
+}
+
+// InputStreams returns all input stream edges of the group.
+func (gg *GroupGraph) InputStreams() Edges {
+ return gg.inputStreams
+}
+
+// JointTables retuns all joint table edges of the group.
+func (gg *GroupGraph) JointTables() Edges {
+ return gg.inputTables
+}
+
+// LookupTables retuns all lookup table edges of the group.
+func (gg *GroupGraph) LookupTables() Edges {
+ return gg.crossTables
+}
+
+// LoopStream returns the loopback edge of the group.
+func (gg *GroupGraph) LoopStream() Edge {
+ // only 1 loop stream is valid
+ if len(gg.loopStream) > 0 {
+ return gg.loopStream[0]
+ }
+ return nil
+}
+
+// GroupTable returns the group table edge of the group.
+func (gg *GroupGraph) GroupTable() Edge {
+ // only 1 group table is valid
+ if len(gg.groupTable) > 0 {
+ return gg.groupTable[0]
+ }
+ return nil
+}
+
+// OutputStreams returns the output stream edges of the group.
+func (gg *GroupGraph) OutputStreams() Edges {
+ return gg.outputStreams
+}
+
+// inputs returns all input topics (tables and streams)
+func (gg *GroupGraph) inputs() Edges {
+ return append(append(gg.inputStreams, gg.inputTables...), gg.crossTables...)
+}
+
+// copartitioned returns all copartitioned topics (joint tables and input streams)
+func (gg *GroupGraph) copartitioned() Edges {
+ return append(gg.inputStreams, gg.inputTables...)
+}
+
+func (gg *GroupGraph) codec(topic string) Codec {
+ return gg.codecs[topic]
+}
+
+func (gg *GroupGraph) callback(topic string) ProcessCallback {
+ return gg.callbacks[topic]
+}
+
+func (gg *GroupGraph) joint(topic string) bool {
+ return gg.joinCheck[topic]
+}
+
+// DefineGroup creates a group graph with a given group name and a list of
+// edges.
+func DefineGroup(group Group, edges ...Edge) *GroupGraph {
+ gg := GroupGraph{group: string(group),
+ codecs: make(map[string]Codec),
+ callbacks: make(map[string]ProcessCallback),
+ joinCheck: make(map[string]bool),
+ }
+
+ for _, e := range edges {
+ switch e := e.(type) {
+ case inputStreams:
+ for _, input := range e {
+ gg.validateInputTopic(input.Topic())
+ inputStr := input.(*inputStream)
+ gg.codecs[input.Topic()] = input.Codec()
+ gg.callbacks[input.Topic()] = inputStr.cb
+ gg.inputStreams = append(gg.inputStreams, inputStr)
+ }
+ case *inputStream:
+ gg.validateInputTopic(e.Topic())
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.callbacks[e.Topic()] = e.cb
+ gg.inputStreams = append(gg.inputStreams, e)
+ case *loopStream:
+ e.setGroup(group)
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.callbacks[e.Topic()] = e.cb
+ gg.loopStream = append(gg.loopStream, e)
+ case *outputStream:
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.outputStreams = append(gg.outputStreams, e)
+ case *inputTable:
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.inputTables = append(gg.inputTables, e)
+ gg.joinCheck[e.Topic()] = true
+ case *crossTable:
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.crossTables = append(gg.crossTables, e)
+ case *groupTable:
+ e.setGroup(group)
+ gg.codecs[e.Topic()] = e.Codec()
+ gg.groupTable = append(gg.groupTable, e)
+ }
+ }
+ return &gg
+}
+
+func (gg *GroupGraph) validateInputTopic(topic string) {
+ if topic == "" {
+ panic("Input topic cannot be empty. This will not work.")
+ }
+
+ if _, exists := gg.callbacks[topic]; exists {
+ panic(fmt.Errorf("Callback for topic %s already exists. It is illegal to consume a topic twice", topic))
+ }
+}
+
+// Validate validates the group graph and returns an error if invalid.
+// Main validation checks are:
+// - at most one loopback stream edge is allowed
+// - at most one group table edge is allowed
+// - at least one input stream is required
+// - table and loopback topics cannot be used in any other edge.
+func (gg *GroupGraph) Validate() error {
+ if len(gg.loopStream) > 1 {
+ return errors.New("more than one loop stream in group graph")
+ }
+ if len(gg.groupTable) > 1 {
+ return errors.New("more than one group table in group graph")
+ }
+ if len(gg.inputStreams) == 0 {
+ return errors.New("no input stream in group graph")
+ }
+ for _, t := range append(gg.outputStreams,
+ append(gg.inputStreams, append(gg.inputTables, gg.crossTables...)...)...) {
+ if t.Topic() == loopName(gg.Group()) {
+ return errors.New("should not directly use loop stream")
+ }
+ if t.Topic() == tableName(gg.Group()) {
+ return errors.New("should not directly use group table")
+ }
+ }
+ return nil
+}
+
+// Edge represents a topic in Kafka and the corresponding codec to encode and
+// decode the messages of that topic.
+type Edge interface {
+ String() string
+ Topic() string
+ Codec() Codec
+}
+
+// Edges is a slice of edge objects.
+type Edges []Edge
+
+// Topics returns the names of the topics of the edges.
+func (e Edges) Topics() []string {
+ var t []string
+ for _, i := range e {
+ t = append(t, i.Topic())
+ }
+ return t
+}
+
+type topicDef struct {
+ name string
+ codec Codec
+}
+
+func (t *topicDef) Topic() string {
+ return t.name
+}
+
+func (t *topicDef) String() string {
+ return fmt.Sprintf("%s/%T", t.name, t.codec)
+}
+
+func (t *topicDef) Codec() Codec {
+ return t.codec
+}
+
+type inputStream struct {
+ *topicDef
+ cb ProcessCallback
+}
+
+// Input represents an edge of an input stream topic. The edge
+// specifies the topic name, its codec and the ProcessorCallback used to
+// process it. The topic has to be copartitioned with any other input stream of
+// the group and with the group table.
+// The group starts reading the topic from the newest offset.
+func Input(topic Stream, c Codec, cb ProcessCallback) Edge {
+ return &inputStream{&topicDef{string(topic), c}, cb}
+}
+
+type inputStreams Edges
+
+func (is inputStreams) String() string {
+ if is == nil {
+ return "empty input streams"
+ }
+
+ return fmt.Sprintf("input streams: %s/%T", is.Topic(), is.Codec())
+}
+
+func (is inputStreams) Topic() string {
+ if is == nil {
+ return ""
+ }
+ var topics []string
+
+ for _, stream := range is {
+ topics = append(topics, stream.Topic())
+ }
+ return strings.Join(topics, ",")
+}
+
+func (is inputStreams) Codec() Codec {
+ if is == nil {
+ return nil
+ }
+ return is[0].Codec()
+}
+
+// Inputs creates edges of multiple input streams sharing the same
+// codec and callback.
+func Inputs(topics Streams, c Codec, cb ProcessCallback) Edge {
+ if len(topics) == 0 {
+ return nil
+ }
+ var edges Edges
+ for _, topic := range topics {
+ edges = append(edges, Input(topic, c, cb))
+ }
+ return inputStreams(edges)
+}
+
+type loopStream inputStream
+
+// Loop represents the edge of the loopback topic of the group. The edge
+// specifies the codec of the messages in the topic and ProcesCallback to
+// process the messages of the topic. Context.Loopback() is used to write
+// messages into this topic from any callback of the group.
+func Loop(c Codec, cb ProcessCallback) Edge {
+ return &loopStream{&topicDef{codec: c}, cb}
+}
+
+func (s *loopStream) setGroup(group Group) {
+ s.topicDef.name = loopName(group)
+}
+
+type inputTable struct {
+ *topicDef
+}
+
+// Join represents an edge of a copartitioned, log-compacted table topic. The
+// edge specifies the topic name and the codec of the messages of the topic.
+// The group starts reading the topic from the oldest offset.
+// The processing of input streams is blocked until all partitions of the table
+// are recovered.
+func Join(topic Table, c Codec) Edge {
+ return &inputTable{&topicDef{string(topic), c}}
+}
+
+type crossTable struct {
+ *topicDef
+}
+
+// Lookup represents an edge of a non-copartitioned, log-compacted table
+// topic. The edge specifies the topic name and the codec of the messages of
+// the topic. The group starts reading the topic from the oldest offset.
+// The processing of input streams is blocked until the table is fully
+// recovered.
+func Lookup(topic Table, c Codec) Edge {
+ return &crossTable{&topicDef{string(topic), c}}
+}
+
+type groupTable struct {
+ *topicDef
+}
+
+// Persist represents the edge of the group table, which is log-compacted and
+// copartitioned with the input streams. This edge specifies the codec of the
+// messages in the topic, ie, the codec of the values of the table.
+// The processing of input streams is blocked until all partitions of the group
+// table are recovered.
+func Persist(c Codec) Edge {
+ return &groupTable{&topicDef{codec: c}}
+}
+
+func (t *groupTable) setGroup(group Group) {
+ t.topicDef.name = string(GroupTable(group))
+}
+
+type outputStream struct {
+ *topicDef
+}
+
+// Output represents an edge of an output stream topic. The edge
+// specifies the topic name and the codec of the messages of the topic.
+// Context.Emit() only emits messages into Output edges defined in the group
+// graph.
+// The topic does not have to be copartitioned with the input streams.
+func Output(topic Stream, c Codec) Edge {
+ return &outputStream{&topicDef{string(topic), c}}
+}
+
+// GroupTable returns the name of the group table of group.
+func GroupTable(group Group) Table {
+ return Table(tableName(group))
+}
+
+func tableName(group Group) string {
+ return string(group) + tableSuffix
+}
+
+// loopName returns the name of the loop topic of group.
+func loopName(group Group) string {
+ return string(group) + loopSuffix
+}
diff --git a/vendor/github.com/lovoo/goka/iterator.go b/vendor/github.com/lovoo/goka/iterator.go
new file mode 100644
index 00000000..4a1c7b55
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/iterator.go
@@ -0,0 +1,59 @@
+package goka
+
+import (
+ "github.com/lovoo/goka/storage"
+)
+
+// Iterator allows one to iterate over the keys of a view.
+type Iterator interface {
+ // Next advances the iterator to the next KV-pair. Err should be called
+ // after Next returns false to check whether the iteration finished
+ // from exhaustion or was aborted due to an error.
+ Next() bool
+ Key() string
+ Value() (interface{}, error)
+ Release()
+ // Err returns the possible iteration error.
+ Err() error
+ Seek(key string) bool
+}
+
+type iterator struct {
+ iter storage.Iterator
+ codec Codec
+}
+
+// Next advances the iterator to the next key.
+func (i *iterator) Next() bool {
+ return i.iter.Next()
+}
+
+// Key returns the current key.
+func (i *iterator) Key() string {
+ return string(i.iter.Key())
+}
+
+// Value returns the current value decoded by the codec of the storage.
+func (i *iterator) Value() (interface{}, error) {
+ data, err := i.iter.Value()
+ if err != nil {
+ return nil, err
+ } else if data == nil {
+ return nil, nil
+ }
+ return i.codec.Decode(data)
+}
+
+// Err returns the possible iteration error.
+func (i *iterator) Err() error {
+ return i.iter.Err()
+}
+
+// Releases releases the iterator. The iterator is not usable anymore after calling Release.
+func (i *iterator) Release() {
+ i.iter.Release()
+}
+
+func (i *iterator) Seek(key string) bool {
+ return i.iter.Seek([]byte(key))
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/builders.go b/vendor/github.com/lovoo/goka/kafka/builders.go
new file mode 100644
index 00000000..52a9d061
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/builders.go
@@ -0,0 +1,80 @@
+package kafka
+
+import (
+ "hash"
+
+ "github.com/Shopify/sarama"
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+// ConsumerBuilder creates a Kafka consumer.
+type ConsumerBuilder func(brokers []string, group, clientID string) (Consumer, error)
+
+// DefaultConsumerBuilder creates a Kafka consumer using the Sarama library.
+func DefaultConsumerBuilder(brokers []string, group, clientID string) (Consumer, error) {
+ config := NewConfig()
+ config.ClientID = clientID
+ return NewSaramaConsumer(brokers, group, config)
+}
+
+// ConsumerBuilderWithConfig creates a Kafka consumer using the Sarama library.
+func ConsumerBuilderWithConfig(config *cluster.Config) ConsumerBuilder {
+ return func(brokers []string, group, clientID string) (Consumer, error) {
+ config.ClientID = clientID
+ return NewSaramaConsumer(brokers, group, config)
+ }
+}
+
+// ProducerBuilder create a Kafka producer.
+type ProducerBuilder func(brokers []string, clientID string, hasher func() hash.Hash32) (Producer, error)
+
+// DefaultProducerBuilder creates a Kafka producer using the Sarama library.
+func DefaultProducerBuilder(brokers []string, clientID string, hasher func() hash.Hash32) (Producer, error) {
+ config := NewConfig()
+ config.ClientID = clientID
+ config.Producer.Partitioner = sarama.NewCustomHashPartitioner(hasher)
+ return NewProducer(brokers, &config.Config)
+}
+
+// ProducerBuilderWithConfig creates a Kafka consumer using the Sarama library.
+func ProducerBuilderWithConfig(config *cluster.Config) ProducerBuilder {
+ return func(brokers []string, clientID string, hasher func() hash.Hash32) (Producer, error) {
+ config.ClientID = clientID
+ config.Producer.Partitioner = sarama.NewCustomHashPartitioner(hasher)
+ return NewProducer(brokers, &config.Config)
+ }
+}
+
+// TopicManagerBuilder creates a TopicManager to check partition counts and
+// create tables.
+type TopicManagerBuilder func(brokers []string) (TopicManager, error)
+
+// DefaultTopicManagerBuilder creates TopicManager using the Sarama library.
+// This topic manager cannot create topics.
+func DefaultTopicManagerBuilder(brokers []string) (TopicManager, error) {
+ return NewSaramaTopicManager(brokers, sarama.NewConfig())
+}
+
+// TopicManagerBuilderWithConfig creates TopicManager using the Sarama library.
+// This topic manager cannot create topics.
+func TopicManagerBuilderWithConfig(config *cluster.Config) TopicManagerBuilder {
+ return func(brokers []string) (TopicManager, error) {
+ return NewSaramaTopicManager(brokers, &config.Config)
+ }
+}
+
+// ZKTopicManagerBuilder creates a TopicManager that connects with ZooKeeper to
+// check partition counts and create tables.
+func ZKTopicManagerBuilder(servers []string) TopicManagerBuilder {
+ return func([]string) (TopicManager, error) {
+ return NewTopicManager(servers, NewTopicManagerConfig())
+ }
+}
+
+// ZKTopicManagerBuilderWithConfig creates a TopicManager that connects with ZooKeeper to
+// check partition counts and create tables given a topic configuration.
+func ZKTopicManagerBuilderWithConfig(servers []string, config *TopicManagerConfig) TopicManagerBuilder {
+ return func([]string) (TopicManager, error) {
+ return NewTopicManager(servers, config)
+ }
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/config.go b/vendor/github.com/lovoo/goka/kafka/config.go
new file mode 100644
index 00000000..a1e599aa
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/config.go
@@ -0,0 +1,33 @@
+package kafka
+
+import (
+ "github.com/Shopify/sarama"
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+// NewConfig creates a (bsm) sarama configuration with default values.
+func NewConfig() *cluster.Config {
+ config := cluster.NewConfig()
+ config.Version = sarama.V0_10_1_0
+
+ // consumer configuration
+ config.Consumer.Return.Errors = true
+ config.Consumer.MaxProcessingTime = defaultMaxProcessingTime
+ // this configures the initial offset for streams. Tables are always
+ // consumed from OffsetOldest.
+ config.Consumer.Offsets.Initial = sarama.OffsetNewest
+
+ // producer configuration
+ config.Producer.RequiredAcks = sarama.WaitForLocal
+ config.Producer.Compression = sarama.CompressionSnappy
+ config.Producer.Flush.Frequency = defaultFlushFrequency
+ config.Producer.Flush.Bytes = defaultFlushBytes
+ config.Producer.Return.Successes = true
+ config.Producer.Return.Errors = true
+ config.Producer.Retry.Max = defaultProducerMaxRetries
+
+ // consumer group configuration
+ config.Group.Return.Notifications = true
+
+ return config
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/consumer.go b/vendor/github.com/lovoo/goka/kafka/consumer.go
new file mode 100644
index 00000000..a634b109
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/consumer.go
@@ -0,0 +1,118 @@
+package kafka
+
+import (
+ "time"
+
+ "github.com/Shopify/sarama"
+ cluster "github.com/bsm/sarama-cluster"
+ "github.com/lovoo/goka/multierr"
+)
+
+const (
+ // size of sarama buffer for consumer and producer
+ defaultChannelBufferSize = 256
+
+ // time sarama-cluster assumes the processing of an event may take
+ defaultMaxProcessingTime = 1 * time.Second
+
+ // producer flush configuration
+ defaultFlushFrequency = 100 * time.Millisecond
+ defaultFlushBytes = 64 * 1024
+ defaultProducerMaxRetries = 10
+)
+
+const (
+ // OffsetNewest defines the newest offset to read from using the consumer
+ OffsetNewest = -1
+ // OffsetOldest defines the oldest offset to read from using the consumer
+ OffsetOldest = -2
+)
+
+// Consumer abstracts a kafka consumer
+type Consumer interface {
+ Events() <-chan Event
+
+ // group consume assumes co-partioned topics
+ // define input topics to consume
+ Subscribe(topics map[string]int64) error
+ // marks the consumer ready to start consuming the messages
+ AddGroupPartition(partition int32)
+ Commit(topic string, partition int32, offset int64) error
+
+ // consume individual topic/partitions
+ AddPartition(topic string, partition int32, initialOffset int64) error
+ RemovePartition(topic string, partition int32) error
+
+ // Close stops closes the events channel
+ Close() error
+}
+
+type saramaConsumer struct {
+ groupConsumer *groupConsumer
+ simpleConsumer *simpleConsumer
+ events chan Event
+}
+
+// NewSaramaConsumer creates a new Consumer using sarama
+func NewSaramaConsumer(brokers []string, group string, config *cluster.Config) (Consumer, error) {
+ chsize := config.Config.ChannelBufferSize
+ if chsize == 0 {
+ chsize = defaultChannelBufferSize
+ }
+ events := make(chan Event, chsize)
+
+ g, err := newGroupConsumer(brokers, group, events, config)
+ if err != nil {
+ return nil, err
+ }
+
+ // since simple consumer only handle tables, be sure to start from oldest
+ simpleConfig := config.Config // copy config
+ simpleConfig.Consumer.Offsets.Initial = sarama.OffsetOldest
+ c, err := newSimpleConsumer(brokers, events, &simpleConfig)
+ if err != nil {
+ return nil, err
+ }
+
+ return &saramaConsumer{
+ groupConsumer: g,
+ simpleConsumer: c,
+ events: events,
+ }, nil
+}
+
+func (c *saramaConsumer) Close() error {
+ // we want to close the events-channel regardless of any errors closing
+ // the consumers
+ defer close(c.events)
+ var errs multierr.Errors
+ if err := c.simpleConsumer.Close(); err != nil {
+ errs.Collect(err)
+ }
+ if err := c.groupConsumer.Close(); err != nil {
+ errs.Collect(err)
+ }
+ return errs.NilOrError()
+}
+
+func (c *saramaConsumer) Events() <-chan Event {
+ return c.events
+}
+
+// group consume assumes co-partioned topics
+func (c *saramaConsumer) Subscribe(topics map[string]int64) error {
+ return c.groupConsumer.Subscribe(topics)
+}
+func (c *saramaConsumer) AddGroupPartition(partition int32) {
+ c.groupConsumer.AddGroupPartition(partition)
+}
+func (c *saramaConsumer) Commit(topic string, partition int32, offset int64) error {
+ return c.groupConsumer.Commit(topic, partition, offset)
+}
+
+func (c *saramaConsumer) AddPartition(topic string, partition int32, initialOffset int64) error {
+ return c.simpleConsumer.AddPartition(topic, partition, int64(initialOffset))
+}
+func (c *saramaConsumer) RemovePartition(topic string, partition int32) error {
+ return c.simpleConsumer.RemovePartition(topic, partition)
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/event.go b/vendor/github.com/lovoo/goka/kafka/event.go
new file mode 100644
index 00000000..76e7e5f6
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/event.go
@@ -0,0 +1,78 @@
+package kafka
+
+import (
+ "fmt"
+ "time"
+)
+
+// Event abstracts different types of events from the kafka consumer like BOF/EOF/Error or an actual message
+type Event interface {
+ string() string
+}
+
+// Assignment represents a partition:offset assignment for the current connection
+type Assignment map[int32]int64
+
+func (a *Assignment) string() string {
+ var am map[int32]int64 = *a
+ return fmt.Sprintf("Assignment %v", am)
+}
+
+// EOF marks the end of the log of a topic/partition.
+type EOF struct {
+ Topic string
+ Partition int32
+ Hwm int64
+}
+
+func (e *EOF) string() string {
+ return fmt.Sprintf("EOF %s/%d:%d", e.Topic, e.Partition, e.Hwm)
+}
+
+// BOF marks the beginning of a topic/partition.
+type BOF struct {
+ Topic string
+ Partition int32
+ Offset int64
+ Hwm int64
+}
+
+func (e *BOF) string() string {
+ return fmt.Sprintf("BOF %s/%d:%d->%d", e.Topic, e.Partition, e.Offset, e.Hwm)
+}
+
+// Message represents a message from kafka containing
+// extra information like topic, partition and offset for convenience
+type Message struct {
+ Topic string
+ Partition int32
+ Offset int64
+ Timestamp time.Time
+ Header map[string][]byte
+
+ Key string
+ Value []byte
+}
+
+func (m *Message) string() string {
+ return fmt.Sprintf("Message %s/%d:%d %s=%v", m.Topic, m.Partition, m.Offset, m.Key, m.Value)
+}
+
+// Error from kafka wrapped to be conform with the Event-Interface
+type Error struct {
+ Err error
+}
+
+func (e *Error) string() string {
+ return e.Err.Error()
+}
+
+// NOP does not carry any information. Useful for debugging.
+type NOP struct {
+ Topic string
+ Partition int32
+}
+
+func (n *NOP) string() string {
+ return "nop"
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/group_consumer.go b/vendor/github.com/lovoo/goka/kafka/group_consumer.go
new file mode 100644
index 00000000..8128fb63
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/group_consumer.go
@@ -0,0 +1,279 @@
+package kafka
+
+import (
+ "fmt"
+ "log"
+ "sync/atomic"
+
+ "github.com/Shopify/sarama"
+ cluster "github.com/bsm/sarama-cluster"
+)
+
+type groupConsumer struct {
+ brokers []string
+ config *cluster.Config
+ consumer clusterConsumer
+
+ group string
+ partitionMap map[int32]bool
+ addPartition chan int32
+
+ events chan Event
+ stop chan bool
+ done chan bool
+ running int64
+}
+
+func newGroupConsumer(brokers []string, group string, events chan Event, config *cluster.Config) (*groupConsumer, error) {
+ return &groupConsumer{
+ group: group,
+ brokers: brokers,
+ config: config,
+ partitionMap: make(map[int32]bool),
+ addPartition: make(chan int32, 2048),
+ events: events,
+ stop: make(chan bool),
+ done: make(chan bool),
+ }, nil
+}
+
+func (c *groupConsumer) Close() error {
+ if atomic.LoadInt64(&c.running) == 0 {
+ // not running
+ return nil
+ }
+ close(c.stop)
+ <-c.done
+ if err := c.consumer.Close(); err != nil {
+ return fmt.Errorf("Failed to close consumer: %v", err)
+ }
+ return nil
+}
+
+func (c *groupConsumer) Subscribe(topics map[string]int64) error {
+ var ts []string
+ for t := range topics {
+ ts = append(ts, string(t))
+ }
+ upConsumer, err := cluster.NewConsumer(c.brokers, c.group, ts, c.config)
+ if err != nil {
+ return err
+ }
+ c.consumer = upConsumer
+
+ go c.run()
+
+ return nil
+}
+
+func (c *groupConsumer) waitForRebalanceOK() bool {
+ for {
+ select {
+ case n := <-c.consumer.Notifications():
+ if !c.checkRebalance(cluster.RebalanceStart, n.Type) {
+ continue
+ }
+
+ select {
+ case nn := <-c.consumer.Notifications():
+ if !c.checkRebalance(cluster.RebalanceOK, nn.Type) {
+ continue
+ }
+
+ return c.handleRebalanceOK(nn)
+ case <-c.stop:
+ return false
+ }
+ case err := <-c.consumer.Errors():
+ select {
+ case c.events <- &Error{err}:
+ case <-c.stop:
+ return false
+ }
+ case <-c.stop:
+ return false
+ }
+ }
+}
+
+func (c *groupConsumer) checkRebalance(expected, actual cluster.NotificationType) bool {
+ if actual != expected {
+ select {
+ case c.events <- &Error{fmt.Errorf("expected %s but received %s", expected, actual)}:
+ case <-c.stop:
+ }
+
+ return false
+ }
+
+ return true
+}
+
+func (c *groupConsumer) handleRebalanceOK(n *cluster.Notification) bool {
+ if n.Type != cluster.RebalanceOK {
+ // panic as this is a programming error
+ log.Panicf("GroupConsumer: unsupported notification type in handleRebalanceOK: %v/%s", n.Type, n.Type)
+ }
+
+ // save partition map
+ m := c.partitionMap
+ c.partitionMap = make(map[int32]bool)
+
+ // create assignment and update partitionMap
+ a := make(Assignment)
+ for _, v := range n.Current {
+ for _, p := range v {
+ a[p] = sarama.OffsetNewest
+
+ // remember whether partition was added using m[p]
+ c.partitionMap[p] = m[p]
+ }
+
+ break // copartitioned topics
+ }
+
+ // send assignment
+ select {
+ case c.events <- &a:
+ return true
+ case <-c.stop:
+ return false
+ }
+}
+
+// returns true if all partitions are registered. otherwise false
+func (c *groupConsumer) partitionsRegistered() bool {
+ for _, v := range c.partitionMap {
+ if !v {
+ return false
+ }
+ }
+ return true
+}
+
+func (c *groupConsumer) AddGroupPartition(partition int32) {
+ select {
+ case c.addPartition <- partition:
+ case <-c.stop:
+ }
+}
+
+func (c *groupConsumer) waitForPartitions() bool {
+ defer c.ensureEmpty()
+
+ // if all registered, start consuming
+ if c.partitionsRegistered() {
+ return true
+ }
+
+ for {
+ select {
+ case par := <-c.addPartition:
+ c.partitionMap[par] = true
+
+ // if all registered, start consuming
+ if c.partitionsRegistered() {
+ return true
+ }
+
+ case <-c.stop:
+ return false
+ }
+ }
+}
+
+func (c *groupConsumer) ensureEmpty() {
+ for {
+ select {
+ case <-c.addPartition:
+ default:
+ return
+ }
+ }
+}
+
+func (c *groupConsumer) waitForMessages() bool {
+ for {
+ select {
+ case n := <-c.consumer.Notifications():
+ if !c.checkRebalance(cluster.RebalanceStart, n.Type) {
+ continue
+ }
+
+ select {
+ case nn := <-c.consumer.Notifications():
+ if !c.checkRebalance(cluster.RebalanceOK, nn.Type) {
+ continue
+ }
+
+ return c.handleRebalanceOK(nn)
+ case <-c.stop:
+ return false
+ }
+ case msg := <-c.consumer.Messages():
+
+ headers := make(map[string][]byte)
+ for _, header := range msg.Headers {
+ headers[string(header.Key)] = header.Value
+ }
+
+ select {
+ case c.events <- &Message{
+ Topic: msg.Topic,
+ Partition: msg.Partition,
+ Offset: msg.Offset,
+ Timestamp: msg.Timestamp,
+ Key: string(msg.Key),
+ Value: msg.Value,
+ Header: headers,
+ }:
+ case <-c.stop:
+ return false
+ }
+
+ case err := <-c.consumer.Errors():
+ select {
+ case c.events <- &Error{err}:
+ case <-c.stop:
+ return false
+ }
+
+ case <-c.stop:
+ return false
+ }
+ }
+}
+
+func (c *groupConsumer) run() {
+ atomic.AddInt64(&c.running, 1)
+ defer close(c.done)
+
+ if !c.waitForRebalanceOK() {
+ return
+ }
+
+ for {
+ if !c.waitForPartitions() {
+ return
+ }
+
+ if !c.waitForMessages() {
+ return
+ }
+ }
+}
+
+func (c *groupConsumer) Commit(topic string, partition int32, offset int64) error {
+ c.consumer.MarkPartitionOffset(topic, partition, offset, "")
+ return nil
+}
+
+//go:generate mockgen -package mock -destination=mock/cluster_consumer.go -source=group_consumer.go clusterConsumer
+type clusterConsumer interface {
+ Close() error
+ MarkPartitionOffset(topic string, partition int32, offset int64, metadata string)
+
+ Notifications() <-chan *cluster.Notification
+ Messages() <-chan *sarama.ConsumerMessage
+ Errors() <-chan error
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/producer.go b/vendor/github.com/lovoo/goka/kafka/producer.go
new file mode 100644
index 00000000..c576be6d
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/producer.go
@@ -0,0 +1,101 @@
+package kafka
+
+import (
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/Shopify/sarama"
+)
+
+// Producer abstracts the kafka producer
+type Producer interface {
+ // Emit sends a message to topic.
+ Emit(topic string, key string, value []byte) *Promise
+ Close() error
+}
+
+type producer struct {
+ producer sarama.AsyncProducer
+ wg sync.WaitGroup
+}
+
+// NewProducer creates new kafka producer for passed brokers.
+func NewProducer(brokers []string, config *sarama.Config) (Producer, error) {
+ aprod, err := sarama.NewAsyncProducer(brokers, config)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to start Sarama producer: %v", err)
+ }
+
+ p := producer{
+ producer: aprod,
+ }
+
+ p.run()
+
+ return &p, nil
+}
+
+// Close stops the producer and waits for the Success/Error channels to drain.
+// Emitting to a closing/closed producer results in write-to-closed-channel panic
+func (p *producer) Close() error {
+ // do an async close to get the rest of the success/error messages to avoid
+ // leaving unfinished promises.
+ p.producer.AsyncClose()
+
+ // wait for the channels to drain
+ done := make(chan struct{})
+ go func() {
+ p.wg.Wait()
+ close(done)
+ }()
+
+ select {
+ case <-done:
+ case <-time.NewTimer(60 * time.Second).C:
+ }
+
+ return nil
+}
+
+// Emit emits a key-value pair to topic and returns a Promise that
+// can be checked for errors asynchronously
+func (p *producer) Emit(topic string, key string, value []byte) *Promise {
+ promise := NewPromise()
+ p.producer.Input() <- &sarama.ProducerMessage{
+ Topic: topic,
+ Key: sarama.StringEncoder(key),
+ Value: sarama.ByteEncoder(value),
+ Metadata: promise,
+ }
+ return promise
+}
+
+// resolve or reject a promise in the message's metadata on Success or Error
+func (p *producer) run() {
+ p.wg.Add(2)
+ go func() {
+ defer p.wg.Done()
+ for {
+ err, ok := <-p.producer.Errors()
+
+ // channel closed, the producer is stopping
+ if !ok {
+ return
+ }
+ err.Msg.Metadata.(*Promise).Finish(err.Err)
+ }
+ }()
+
+ go func() {
+ defer p.wg.Done()
+ for {
+ msg, ok := <-p.producer.Successes()
+ // channel closed, the producer is stopping
+ if !ok {
+ return
+ }
+ msg.Metadata.(*Promise).Finish(nil)
+ }
+ }()
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/promise.go b/vendor/github.com/lovoo/goka/kafka/promise.go
new file mode 100644
index 00000000..c2f69c7e
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/promise.go
@@ -0,0 +1,57 @@
+package kafka
+
+import "sync"
+
+// Promise as in https://en.wikipedia.org/wiki/Futures_and_promises
+type Promise struct {
+ sync.Mutex
+ err error
+ finished bool
+
+ callbacks []func(err error)
+}
+
+// NewPromise creates a new Promise
+func NewPromise() *Promise {
+ return new(Promise)
+}
+
+// execute all callbacks conveniently
+// The caller needs to lock!
+func (p *Promise) executeCallbacks() {
+ // already resolved
+ if p.finished {
+ return
+ }
+ for _, s := range p.callbacks {
+ s(p.err)
+ }
+ // mark as finished
+ p.finished = true
+}
+
+// Then chains a callback to the Promise
+func (p *Promise) Then(s func(err error)) *Promise {
+ p.Lock()
+ defer p.Unlock()
+
+ // promise already run, call the callback immediately
+ if p.finished {
+ s(p.err)
+ // append it to the subscribers otherwise
+ } else {
+ p.callbacks = append(p.callbacks, s)
+ }
+ return p
+}
+
+// Finish finishes the promise by executing all callbacks and saving the message/error for late subscribers
+func (p *Promise) Finish(err error) *Promise {
+ p.Lock()
+ defer p.Unlock()
+
+ p.err = err
+
+ p.executeCallbacks()
+ return p
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/simple_consumer.go b/vendor/github.com/lovoo/goka/kafka/simple_consumer.go
new file mode 100644
index 00000000..04deb513
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/simple_consumer.go
@@ -0,0 +1,253 @@
+package kafka
+
+import (
+ "fmt"
+ "sync"
+
+ "github.com/Shopify/sarama"
+)
+
+type topicPartition struct {
+ topic string
+ partition int32
+}
+
+type simpleConsumer struct {
+ client sarama.Client
+ consumer sarama.Consumer
+ partitions map[topicPartition]sarama.PartitionConsumer
+ m sync.Mutex
+
+ events chan Event
+ dying chan bool
+
+ wg sync.WaitGroup
+}
+
+func newSimpleConsumer(brokers []string, events chan Event, config *sarama.Config) (*simpleConsumer, error) {
+ client, err := sarama.NewClient(brokers, config)
+ if err != nil {
+ return nil, fmt.Errorf("Cannot connect to kafka: %v", err)
+ }
+
+ consumer, err := sarama.NewConsumerFromClient(client)
+ if err != nil {
+ return nil, fmt.Errorf("Cannot create consumer: %v", err)
+ }
+
+ return &simpleConsumer{
+ client: client,
+ consumer: consumer,
+ events: events,
+ dying: make(chan bool),
+ partitions: make(map[topicPartition]sarama.PartitionConsumer),
+ }, nil
+}
+
+func (c *simpleConsumer) Close() error {
+ // stop any blocking writes to channels
+ close(c.dying)
+
+ c.m.Lock()
+ defer c.m.Unlock()
+ for tp, pc := range c.partitions {
+ pc.AsyncClose()
+ delete(c.partitions, tp)
+ }
+
+ // wait until all partition consumers have finished
+ c.wg.Wait()
+
+ if err := c.consumer.Close(); err != nil {
+ return fmt.Errorf("Failed to close consumer: %v", err)
+ }
+
+ if err := c.client.Close(); err != nil {
+ return fmt.Errorf("Failed to close client in consumer: %v", err)
+ }
+ return nil
+}
+
+func (c *simpleConsumer) AddPartition(topic string, partition int32, offset int64) error {
+ c.m.Lock()
+ defer c.m.Unlock()
+ tp := topicPartition{topic, partition}
+ if _, has := c.partitions[tp]; has {
+ return fmt.Errorf("%s/%d already added", topic, partition)
+ }
+
+ // find best offset
+ start, hwm, err := c.getOffsets(topic, partition, offset)
+ if err != nil {
+ return fmt.Errorf("error getting offsets %s/%d: %v", topic, partition, err)
+ }
+
+ pc, err := c.consumer.ConsumePartition(topic, partition, start)
+ if err != nil {
+ return fmt.Errorf("error creating consumer for %s/%d: %v", topic, partition, err)
+ }
+ c.partitions[tp] = pc
+
+ c.wg.Add(1)
+ go func() {
+ defer c.wg.Done()
+ defer func() {
+ if err := recover(); err != nil {
+ c.events <- &Error{
+ Err: fmt.Errorf("panic: %v", err),
+ }
+ }
+ }()
+ c.run(pc, topic, partition, start, hwm)
+ }()
+ return nil
+}
+
+func (c *simpleConsumer) run(pc sarama.PartitionConsumer, topic string, partition int32, start, hwm int64) {
+ // mark beginning of partition consumption
+ select {
+ case c.events <- &BOF{
+ Topic: topic,
+ Partition: partition,
+ Offset: start,
+ Hwm: hwm,
+ }:
+ case <-c.dying:
+ return
+ }
+
+ // generate EOF if nothing to consume
+ if start == hwm {
+ select {
+ case c.events <- &EOF{
+ Topic: topic,
+ Partition: partition,
+ Hwm: hwm,
+ }:
+ case <-c.dying:
+ return
+ }
+ }
+
+ count := 0
+ // wait for messages to arrive
+ for {
+ select {
+ case m, ok := <-pc.Messages():
+ if !ok {
+ // Partition was removed. Continue to loop until errors are also
+ // drained.
+ continue
+ }
+
+ headers := make(map[string][]byte)
+ for _, header := range m.Headers {
+ headers[string(header.Key)] = header.Value
+ }
+
+ select {
+ case c.events <- &Message{
+ Topic: m.Topic,
+ Partition: m.Partition,
+ Offset: m.Offset,
+ Key: string(m.Key),
+ Value: m.Value,
+ Timestamp: m.Timestamp,
+ Header: headers,
+ }:
+ case <-c.dying:
+ return
+ }
+
+ if m.Offset == pc.HighWaterMarkOffset()-1 {
+ select {
+ case c.events <- &EOF{
+ Topic: m.Topic,
+ Partition: m.Partition,
+ Hwm: m.Offset + 1,
+ }:
+ case <-c.dying:
+ return
+ }
+ }
+
+ count++
+ if count%1000 == 0 && m.Offset >= hwm { // was this EOF?
+ select {
+ case c.events <- &EOF{
+ Topic: m.Topic,
+ Partition: m.Partition,
+ Hwm: pc.HighWaterMarkOffset(),
+ }:
+ case <-c.dying:
+ return
+ }
+ }
+ case err, ok := <-pc.Errors():
+ if !ok {
+ // Partition was removed.
+ return
+ }
+ select {
+ case c.events <- &Error{
+ Err: err,
+ }:
+ case <-c.dying:
+ return
+ }
+ return
+ case <-c.dying:
+ // Only closed when simple_consumer was closed, not when partitions are removed.
+ return
+ }
+ }
+}
+
+func (c *simpleConsumer) RemovePartition(topic string, partition int32) error {
+ tp := topicPartition{topic, partition}
+ c.m.Lock()
+ defer c.m.Unlock()
+ pc, has := c.partitions[tp]
+ if !has {
+ return fmt.Errorf("%s/%d was not added", topic, partition)
+ }
+ delete(c.partitions, tp)
+
+ if err := pc.Close(); err != nil {
+ return fmt.Errorf("error closing consumer for %s/%d: %v", topic, partition, err)
+ }
+
+ return nil
+}
+
+func (c *simpleConsumer) getOffsets(topic string, partition int32, offset int64) (start, hwm int64, err error) {
+ // check if there is anything to consume in topic/partition
+ oldest, err := c.client.GetOffset(topic, partition, sarama.OffsetOldest)
+ if err != nil {
+ err = fmt.Errorf("Error reading oldest log offset from kafka: %v", err)
+ return
+ }
+
+ // get HighWaterMark
+ hwm, err = c.client.GetOffset(topic, partition, sarama.OffsetNewest)
+ if err != nil {
+ err = fmt.Errorf("Error reading current log offset from kafka: %v", err)
+ return
+ }
+
+ start = offset
+
+ if offset == sarama.OffsetOldest {
+ start = oldest
+ } else if offset == sarama.OffsetNewest {
+ start = hwm
+ }
+
+ if start > hwm {
+ start = hwm
+ }
+ if start < oldest {
+ start = oldest
+ }
+ return
+}
diff --git a/vendor/github.com/lovoo/goka/kafka/topic_manager.go b/vendor/github.com/lovoo/goka/kafka/topic_manager.go
new file mode 100644
index 00000000..6e023f99
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/kafka/topic_manager.go
@@ -0,0 +1,292 @@
+package kafka
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/Shopify/sarama"
+ kazoo "github.com/wvanbergen/kazoo-go"
+)
+
+// TopicManager provides an interface to create/check topics and their partitions
+type TopicManager interface {
+ // EnsureTableExists checks that a table (log-compacted topic) exists, or create one if possible
+ EnsureTableExists(topic string, npar int) error
+ // EnsureStreamExists checks that a stream topic exists, or create one if possible
+ EnsureStreamExists(topic string, npar int) error
+ // EnsureTopicExists checks that a topic exists, or create one if possible,
+ // enforcing the given configuration
+ EnsureTopicExists(topic string, npar, rfactor int, config map[string]string) error
+
+ // Partitions returns the number of partitions of a topic, that are assigned to the running
+ // instance, i.e. it doesn't represent all partitions of a topic.
+ Partitions(topic string) ([]int32, error)
+
+ // Close closes the topic manager
+ Close() error
+}
+
+type saramaTopicManager struct {
+ brokers []string
+ client sarama.Client
+}
+
+// NewSaramaTopicManager creates a new topic manager using the sarama library
+func NewSaramaTopicManager(brokers []string, config *sarama.Config) (TopicManager, error) {
+ client, err := sarama.NewClient(brokers, config)
+ if err != nil {
+ return nil, fmt.Errorf("Error creating the kafka client: %v", err)
+ }
+
+ return &saramaTopicManager{
+ brokers: brokers,
+ client: client,
+ }, nil
+}
+
+func (m *saramaTopicManager) Close() error {
+ return m.client.Close()
+}
+
+func (m *saramaTopicManager) Partitions(topic string) ([]int32, error) {
+ return m.client.Partitions(topic)
+}
+
+func (m *saramaTopicManager) EnsureStreamExists(topic string, npar int) error {
+ return m.EnsureTableExists(topic, npar)
+}
+
+func (m *saramaTopicManager) EnsureTableExists(topic string, npar int) error {
+ par, err := m.client.Partitions(topic)
+ if err != nil {
+ return fmt.Errorf("could not ensure %s exists: %v", topic, err)
+ }
+ if len(par) != npar {
+ return fmt.Errorf("topic %s has %d partitions instead of %d", topic, len(par), npar)
+ }
+ return nil
+}
+
+func (m *saramaTopicManager) EnsureTopicExists(topic string, npar, rfactor int, config map[string]string) error {
+ return fmt.Errorf("not implemented in SaramaTopicManager")
+}
+
+// TopicManagerConfig contains the configuration to access the Zookeeper servers
+// as well as the desired options of to create tables and stream topics.
+type TopicManagerConfig struct {
+ Table struct {
+ Replication int
+ }
+ Stream struct {
+ Replication int
+ Retention time.Duration
+ }
+}
+
+type topicManager struct {
+ zk kzoo
+ servers []string
+ config *TopicManagerConfig
+}
+
+// NewTopicManagerConfig provides a default configuration for auto-creation
+// with replication factor of 1 and rentention time of 1 hour.
+func NewTopicManagerConfig() *TopicManagerConfig {
+ cfg := new(TopicManagerConfig)
+ cfg.Table.Replication = 2
+ cfg.Stream.Replication = 2
+ cfg.Stream.Retention = 1 * time.Hour
+ return cfg
+}
+
+// NewTopicManager creates a new topic manager for managing topics with zookeeper
+func NewTopicManager(servers []string, config *TopicManagerConfig) (TopicManager, error) {
+ servers, chroot, err := updateChroot(servers)
+ if err != nil {
+ return nil, err
+ }
+
+ cfg := kazoo.NewConfig()
+ cfg.Chroot = chroot
+
+ if config == nil {
+ config = NewTopicManagerConfig()
+ }
+
+ kzoo, err := kazoo.NewKazoo(servers, cfg)
+ if err != nil {
+ return nil, fmt.Errorf("could not connect to ZooKeeper: %v", err)
+ }
+
+ return &topicManager{
+ zk: kzoo,
+ config: config,
+ }, nil
+}
+
+func (m *topicManager) Close() error {
+ return m.zk.Close()
+}
+
+func (m *topicManager) EnsureTableExists(topic string, npar int) error {
+ err := checkTopic(
+ m.zk, topic, npar,
+ m.config.Table.Replication,
+ map[string]string{"cleanup.policy": "compact"},
+ false,
+ )
+ if err != nil {
+ return err
+ }
+ // check number of partitions
+ return m.checkPartitions(topic, npar)
+}
+
+func (m *topicManager) EnsureStreamExists(topic string, npar int) error {
+ retention := int(m.config.Stream.Retention.Nanoseconds() / time.Millisecond.Nanoseconds())
+ err := checkTopic(
+ m.zk, topic, npar,
+ m.config.Stream.Replication,
+ map[string]string{"retention.ms": strconv.Itoa(retention)},
+ false,
+ )
+ if err != nil {
+ return err
+ }
+ return m.checkPartitions(topic, npar)
+}
+
+func (m *topicManager) EnsureTopicExists(topic string, npar, rfactor int, config map[string]string) error {
+ if err := checkTopic(m.zk, topic, npar, rfactor, config, true); err != nil {
+ return err
+ }
+ return m.checkPartitions(topic, npar)
+}
+
+func (m *topicManager) Partitions(topic string) ([]int32, error) {
+ tl, err := m.zk.Topics()
+ if err != nil {
+ return nil, err
+ }
+ t := tl.Find(topic)
+ if t == nil {
+ return nil, nil
+ }
+
+ pl, err := t.Partitions()
+ if err != nil {
+ return nil, err
+ }
+ var partitions []int32
+ for _, p := range pl {
+ partitions = append(partitions, p.ID)
+ }
+ return partitions, nil
+}
+
+// ensure topic exists
+func checkTopic(kz kzoo, topic string, npar int, rfactor int, cfg map[string]string, ensureConfig bool) error {
+ ok, err := hasTopic(kz, topic)
+ if err != nil {
+ return err
+ }
+ if !ok {
+ err = kz.CreateTopic(topic, npar, rfactor, cfg)
+ if err != nil {
+ return err
+ }
+ }
+ if !ensureConfig {
+ return nil
+ }
+ // topic exists, check if config the same
+ c, err := kz.Topic(topic).Config()
+ if err != nil {
+ return err
+ }
+ for k, v := range cfg {
+ if c[k] != v {
+ return fmt.Errorf("expected %s=%s, but found %s", k, cfg[k], c[k])
+ }
+ }
+ return nil
+}
+
+// returns true if topic exists, false otherwise
+func hasTopic(kz kzoo, topic string) (bool, error) {
+ topics, err := kz.Topics()
+ if err != nil {
+ return false, err
+ }
+ for _, t := range topics {
+ if t.Name == topic {
+ return true, nil
+ }
+ }
+ return false, nil
+}
+
+// check that the number of paritions match npar using kazoo library
+func (m *topicManager) checkPartitions(topic string, npar int) error {
+ t := m.zk.Topic(topic)
+
+ partitions, err := t.Partitions()
+ if err != nil {
+ return fmt.Errorf("Error fetching partitions for topic %s: %v", topic, err)
+ }
+ if len(partitions) != npar {
+ return fmt.Errorf("Topic %s does not have %d partitions", topic, npar)
+ }
+ return nil
+}
+
+// check that the number of paritions match
+func checkPartitions(client sarama.Client, topic string, npar int) error {
+ // check if topic has npar partitions
+ partitions, err := client.Partitions(topic)
+ if err != nil {
+ return fmt.Errorf("Error fetching partitions for topic %s: %v", topic, err)
+ }
+ if len(partitions) != npar {
+ return fmt.Errorf("Topic %s has %d partitions instead of %d", topic, len(partitions), npar)
+ }
+ return nil
+}
+
+func updateChroot(servers []string) (servs []string, chroot string, err error) {
+ // find chroot in server addresses
+ for _, server := range servers {
+ for strings.HasSuffix(server, "/") {
+ server = server[:len(server)-1]
+ }
+ splt := strings.Split(server, "/")
+ if len(splt) == 1 {
+ // no chroot in address
+ servs = append(servs, server)
+ continue
+ }
+ if len(splt) > 2 {
+ err = fmt.Errorf("Could not parse %s properly", server)
+ return
+ }
+ servs = append(servs, splt[0])
+ c := fmt.Sprintf("/%s", splt[1])
+ if chroot == "" {
+ chroot = c
+ } else if c != chroot {
+ err = fmt.Errorf("Multiple chroot set (%s != %s)", c, chroot)
+ return
+ }
+ }
+ return
+}
+
+//go:generate mockgen -package mock -destination mock/kazoo.go -source=topic_manager.go kzoo
+type kzoo interface {
+ Topic(topic string) *kazoo.Topic
+ Topics() (kazoo.TopicList, error)
+ CreateTopic(topic string, npar int, rep int, config map[string]string) error
+ Close() error
+}
diff --git a/vendor/github.com/lovoo/goka/logger/logger.go b/vendor/github.com/lovoo/goka/logger/logger.go
new file mode 100644
index 00000000..aa6ac7b4
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/logger/logger.go
@@ -0,0 +1,34 @@
+package logger
+
+import "log"
+
+var (
+ defaultLogger = &std{}
+)
+
+// Logger is the interface Goka and its subpackages use for logging.
+type Logger interface {
+ // Printf will be used for informational messages. These can be thought of
+ // having an 'Info'-level in a structured logger.
+ Printf(string, ...interface{})
+ // Panicf will be only called an unexpected programming error such as a type
+ // assertion which should never fail. Regular errors will be returned out
+ // from the library.
+ Panicf(string, ...interface{})
+}
+
+// std bridges the logger calls to the standard library log.
+type std struct{}
+
+func (s *std) Printf(msg string, args ...interface{}) {
+ log.Printf(msg, args...)
+}
+
+func (s *std) Panicf(msg string, args ...interface{}) {
+ log.Panicf(msg, args...)
+}
+
+// Default returns the standard library logger
+func Default() Logger {
+ return defaultLogger
+}
diff --git a/vendor/github.com/lovoo/goka/logo.png b/vendor/github.com/lovoo/goka/logo.png
new file mode 100644
index 00000000..3b70feeb
Binary files /dev/null and b/vendor/github.com/lovoo/goka/logo.png differ
diff --git a/vendor/github.com/lovoo/goka/multierr/errgroup.go b/vendor/github.com/lovoo/goka/multierr/errgroup.go
new file mode 100644
index 00000000..211cec7e
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/multierr/errgroup.go
@@ -0,0 +1,32 @@
+package multierr
+
+import (
+ "context"
+
+ "golang.org/x/sync/errgroup"
+)
+
+type ErrGroup struct {
+ *errgroup.Group
+ err Errors
+}
+
+func NewErrGroup(ctx context.Context) (*ErrGroup, context.Context) {
+ g, ctx := errgroup.WithContext(ctx)
+ return &ErrGroup{Group: g}, ctx
+}
+
+func (g *ErrGroup) Wait() *Errors {
+ _ = g.Group.Wait()
+ return &g.err
+}
+
+func (g *ErrGroup) Go(f func() error) {
+ g.Group.Go(func() error {
+ if err := f(); err != nil {
+ g.err.Collect(err)
+ return err
+ }
+ return nil
+ })
+}
diff --git a/vendor/github.com/lovoo/goka/multierr/errors.go b/vendor/github.com/lovoo/goka/multierr/errors.go
new file mode 100644
index 00000000..540cbe71
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/multierr/errors.go
@@ -0,0 +1,65 @@
+package multierr
+
+import (
+ "fmt"
+ "sync"
+)
+
+// Errors represent a list of errors triggered during the execution of a goka view/processor.
+// Normally, the first error leads to stopping the processor/view, but during shutdown, more errors
+// might occur.
+type Errors struct {
+ errs []error
+ m sync.Mutex
+}
+
+func (e *Errors) Collect(err error) *Errors {
+ if err == nil {
+ return e
+ }
+ e.m.Lock()
+ e.errs = append(e.errs, err)
+ e.m.Unlock()
+ return e
+}
+
+func (e *Errors) Merge(o *Errors) *Errors {
+ if o == nil {
+ return e
+ }
+
+ // lock base
+ e.m.Lock()
+ defer e.m.Unlock()
+ // lock other
+ o.m.Lock()
+ defer o.m.Unlock()
+
+ e.errs = append(e.errs, o.errs...)
+ return e
+}
+
+func (e *Errors) HasErrors() bool {
+ return len(e.errs) > 0
+}
+
+func (e *Errors) Error() string {
+ if !e.HasErrors() {
+ return ""
+ }
+ if len(e.errs) == 1 {
+ return e.errs[0].Error()
+ }
+ str := "Errors:\n"
+ for _, err := range e.errs {
+ str += fmt.Sprintf("\t* %s\n", err.Error())
+ }
+ return str
+}
+
+func (e *Errors) NilOrError() error {
+ if e.HasErrors() {
+ return e
+ }
+ return nil
+}
diff --git a/vendor/github.com/lovoo/goka/once.go b/vendor/github.com/lovoo/goka/once.go
new file mode 100644
index 00000000..c8c4f2ad
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/once.go
@@ -0,0 +1,14 @@
+package goka
+
+import "sync"
+
+type once struct {
+ once sync.Once
+ err error
+}
+
+// Do runs only once and always return the same error.
+func (o *once) Do(f func() error) error {
+ o.once.Do(func() { o.err = f() })
+ return o.err
+}
diff --git a/vendor/github.com/lovoo/goka/options.go b/vendor/github.com/lovoo/goka/options.go
new file mode 100644
index 00000000..13663587
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/options.go
@@ -0,0 +1,456 @@
+package goka
+
+import (
+ "fmt"
+ "hash"
+ "hash/fnv"
+ "path/filepath"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/logger"
+ "github.com/lovoo/goka/storage"
+)
+
+// UpdateCallback is invoked upon arrival of a message for a table partition.
+// The partition storage shall be updated in the callback.
+type UpdateCallback func(s storage.Storage, partition int32, key string, value []byte) error
+
+// RebalanceCallback is invoked when the processor receives a new partition assignment.
+type RebalanceCallback func(a kafka.Assignment)
+
+///////////////////////////////////////////////////////////////////////////////
+// default values
+///////////////////////////////////////////////////////////////////////////////
+
+const (
+ defaultBaseStoragePath = "/tmp/goka"
+ defaultClientID = "goka"
+)
+
+// DefaultProcessorStoragePath is the default path where processor state
+// will be stored.
+func DefaultProcessorStoragePath(group Group) string {
+ return filepath.Join(defaultBaseStoragePath, "processor", string(group))
+}
+
+// DefaultViewStoragePath returns the default path where view state will be stored.
+func DefaultViewStoragePath() string {
+ return filepath.Join(defaultBaseStoragePath, "view")
+}
+
+// DefaultUpdate is the default callback used to update the local storage with
+// from the table topic in Kafka. It is called for every message received
+// during recovery of processors and during the normal operation of views.
+// DefaultUpdate can be used in the function passed to WithUpdateCallback and
+// WithViewCallback.
+func DefaultUpdate(s storage.Storage, partition int32, key string, value []byte) error {
+ if value == nil {
+ return s.Delete(key)
+ }
+
+ return s.Set(key, value)
+}
+
+
+// DefaultRebalance is the default callback when a new partition assignment is received.
+// DefaultRebalance can be used in the function passed to WithRebalanceCallback.
+func DefaultRebalance(a kafka.Assignment) {}
+
+// DefaultHasher returns an FNV hasher builder to assign keys to partitions.
+func DefaultHasher() func() hash.Hash32 {
+ return func() hash.Hash32 {
+ return fnv.New32a()
+ }
+
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// processor options
+///////////////////////////////////////////////////////////////////////////////
+
+// ProcessorOption defines a configuration option to be used when creating a processor.
+type ProcessorOption func(*poptions, *GroupGraph)
+
+// processor options
+type poptions struct {
+ log logger.Logger
+ clientID string
+
+ updateCallback UpdateCallback
+ rebalanceCallback RebalanceCallback
+ partitionChannelSize int
+ hasher func() hash.Hash32
+ nilHandling NilHandling
+
+ builders struct {
+ storage storage.Builder
+ consumer kafka.ConsumerBuilder
+ producer kafka.ProducerBuilder
+ topicmgr kafka.TopicManagerBuilder
+ }
+}
+
+// WithUpdateCallback defines the callback called upon recovering a message
+// from the log.
+func WithUpdateCallback(cb UpdateCallback) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.updateCallback = cb
+ }
+}
+
+// WithClientID defines the client ID used to identify with Kafka.
+func WithClientID(clientID string) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.clientID = clientID
+ }
+}
+
+// WithStorageBuilder defines a builder for the storage of each partition.
+func WithStorageBuilder(sb storage.Builder) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.builders.storage = sb
+ }
+}
+
+// WithTopicManagerBuilder replaces the default topic manager builder.
+func WithTopicManagerBuilder(tmb kafka.TopicManagerBuilder) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.builders.topicmgr = tmb
+ }
+}
+
+// WithConsumerBuilder replaces the default consumer builder.
+func WithConsumerBuilder(cb kafka.ConsumerBuilder) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.builders.consumer = cb
+ }
+}
+
+// WithProducerBuilder replaces the default producer builder.
+func WithProducerBuilder(pb kafka.ProducerBuilder) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.builders.producer = pb
+ }
+}
+
+// WithPartitionChannelSize replaces the default partition channel size.
+// This is mostly used for testing by setting it to 0 to have synchronous behavior
+// of goka.
+func WithPartitionChannelSize(size int) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.partitionChannelSize = size
+ }
+}
+
+// WithLogger sets the logger the processor should use. By default, processors
+// use the standard library logger.
+func WithLogger(log logger.Logger) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.log = log
+ }
+}
+
+// WithHasher sets the hash function that assigns keys to partitions.
+func WithHasher(hasher func() hash.Hash32) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.hasher = hasher
+ }
+}
+
+// WithGroupGraphHook allows a function to obtain the group graph when a processor is started.
+func WithGroupGraphHook(hook func(gg *GroupGraph)) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ hook(gg)
+ }
+}
+
+// NilHandling defines how nil messages should be handled by the processor.
+type NilHandling int
+
+const (
+ // NilIgnore drops any message with nil value.
+ NilIgnore NilHandling = 0 + iota
+ // NilProcess passes the nil value to ProcessCallback.
+ NilProcess
+ // NilDecode passes the nil value to decoder before calling ProcessCallback.
+ NilDecode
+)
+
+// WithNilHandling configures how the processor should handle messages with nil
+// value. By default the processor ignores nil messages.
+func WithNilHandling(nh NilHandling) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.nilHandling = nh
+ }
+}
+
+// Tester interface to avoid import cycles when a processor needs to register to
+// the tester.
+type Tester interface {
+ StorageBuilder() storage.Builder
+ ConsumerBuilder() kafka.ConsumerBuilder
+ ProducerBuilder() kafka.ProducerBuilder
+ EmitterProducerBuilder() kafka.ProducerBuilder
+ TopicManagerBuilder() kafka.TopicManagerBuilder
+ RegisterGroupGraph(*GroupGraph)
+ RegisterEmitter(Stream, Codec)
+ RegisterView(Table, Codec)
+}
+
+// WithTester configures all external connections of a processor, ie, storage,
+// consumer and producer
+func WithTester(t Tester) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.builders.storage = t.StorageBuilder()
+ o.builders.consumer = t.ConsumerBuilder()
+ o.builders.producer = t.ProducerBuilder()
+ o.builders.topicmgr = t.TopicManagerBuilder()
+ o.partitionChannelSize = 0
+ t.RegisterGroupGraph(gg)
+ }
+}
+
+func (opt *poptions) applyOptions(gg *GroupGraph, opts ...ProcessorOption) error {
+ opt.clientID = defaultClientID
+ opt.log = logger.Default()
+ opt.hasher = DefaultHasher()
+
+ for _, o := range opts {
+ o(opt, gg)
+ }
+
+ // StorageBuilder should always be set as a default option in NewProcessor
+ if opt.builders.storage == nil {
+ return fmt.Errorf("StorageBuilder not set")
+ }
+ if opt.builders.consumer == nil {
+ opt.builders.consumer = kafka.DefaultConsumerBuilder
+ }
+ if opt.builders.producer == nil {
+ opt.builders.producer = kafka.DefaultProducerBuilder
+ }
+ if opt.builders.topicmgr == nil {
+ opt.builders.topicmgr = kafka.DefaultTopicManagerBuilder
+ }
+
+ return nil
+}
+
+// WithRebalanceCallback sets the callback for when a new partition assignment
+// is received. By default, this is an empty function.
+func WithRebalanceCallback(cb RebalanceCallback) ProcessorOption {
+ return func(o *poptions, gg *GroupGraph) {
+ o.rebalanceCallback = cb
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// view options
+///////////////////////////////////////////////////////////////////////////////
+
+// ViewOption defines a configuration option to be used when creating a view.
+type ViewOption func(*voptions, Table, Codec)
+
+type voptions struct {
+ log logger.Logger
+ clientID string
+ tableCodec Codec
+ updateCallback UpdateCallback
+ partitionChannelSize int
+ hasher func() hash.Hash32
+ restartable bool
+
+ builders struct {
+ storage storage.Builder
+ consumer kafka.ConsumerBuilder
+ topicmgr kafka.TopicManagerBuilder
+ }
+}
+
+// WithViewLogger sets the logger the view should use. By default, views
+// use the standard library logger.
+func WithViewLogger(log logger.Logger) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.log = log
+ }
+}
+
+// WithViewCallback defines the callback called upon recovering a message
+// from the log.
+func WithViewCallback(cb UpdateCallback) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.updateCallback = cb
+ }
+}
+
+// WithViewStorageBuilder defines a builder for the storage of each partition.
+func WithViewStorageBuilder(sb storage.Builder) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.builders.storage = sb
+ }
+}
+
+// WithViewConsumerBuilder replaces default view consumer.
+func WithViewConsumerBuilder(cb kafka.ConsumerBuilder) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.builders.consumer = cb
+ }
+}
+
+// WithViewTopicManagerBuilder replaces the default topic manager.
+func WithViewTopicManagerBuilder(tmb kafka.TopicManagerBuilder) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.builders.topicmgr = tmb
+ }
+}
+
+// WithViewPartitionChannelSize replaces the default partition channel size.
+// This is mostly used for testing by setting it to 0 to have synchronous behavior
+// of goka.
+func WithViewPartitionChannelSize(size int) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.partitionChannelSize = size
+ }
+}
+
+// WithViewHasher sets the hash function that assigns keys to partitions.
+func WithViewHasher(hasher func() hash.Hash32) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.hasher = hasher
+ }
+}
+
+// WithViewClientID defines the client ID used to identify with Kafka.
+func WithViewClientID(clientID string) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.clientID = clientID
+ }
+}
+
+// WithViewRestartable defines the view can be restarted, even when Run()
+// returns errors. If the view is restartable, the client must call Terminate()
+// to release all resources, ie, close the local storage.
+func WithViewRestartable() ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.restartable = true
+ }
+}
+
+// WithViewTester configures all external connections of a processor, ie, storage,
+// consumer and producer
+func WithViewTester(t Tester) ViewOption {
+ return func(o *voptions, table Table, codec Codec) {
+ o.builders.storage = t.StorageBuilder()
+ o.builders.consumer = t.ConsumerBuilder()
+ o.builders.topicmgr = t.TopicManagerBuilder()
+ o.partitionChannelSize = 0
+ t.RegisterView(table, codec)
+ }
+}
+
+func (opt *voptions) applyOptions(topic Table, codec Codec, opts ...ViewOption) error {
+ opt.clientID = defaultClientID
+ opt.log = logger.Default()
+ opt.hasher = DefaultHasher()
+
+ for _, o := range opts {
+ o(opt, topic, codec)
+ }
+
+ // StorageBuilder should always be set as a default option in NewView
+ if opt.builders.storage == nil {
+ return fmt.Errorf("StorageBuilder not set")
+ }
+ if opt.builders.consumer == nil {
+ opt.builders.consumer = kafka.DefaultConsumerBuilder
+ }
+ if opt.builders.topicmgr == nil {
+ opt.builders.topicmgr = kafka.DefaultTopicManagerBuilder
+ }
+
+ return nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// emitter options
+///////////////////////////////////////////////////////////////////////////////
+
+// EmitterOption defines a configuration option to be used when creating an
+// emitter.
+type EmitterOption func(*eoptions, Stream, Codec)
+
+// emitter options
+type eoptions struct {
+ log logger.Logger
+ clientID string
+
+ hasher func() hash.Hash32
+
+ builders struct {
+ topicmgr kafka.TopicManagerBuilder
+ producer kafka.ProducerBuilder
+ }
+}
+
+// WithEmitterLogger sets the logger the emitter should use. By default,
+// emitters use the standard library logger.
+func WithEmitterLogger(log logger.Logger) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.log = log
+ }
+}
+
+// WithEmitterClientID defines the client ID used to identify with kafka.
+func WithEmitterClientID(clientID string) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.clientID = clientID
+ }
+}
+
+// WithEmitterTopicManagerBuilder replaces the default topic manager builder.
+func WithEmitterTopicManagerBuilder(tmb kafka.TopicManagerBuilder) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.builders.topicmgr = tmb
+ }
+}
+
+// WithEmitterProducerBuilder replaces the default producer builder.
+func WithEmitterProducerBuilder(pb kafka.ProducerBuilder) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.builders.producer = pb
+ }
+}
+
+// WithEmitterHasher sets the hash function that assigns keys to partitions.
+func WithEmitterHasher(hasher func() hash.Hash32) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.hasher = hasher
+ }
+}
+
+func WithEmitterTester(t Tester) EmitterOption {
+ return func(o *eoptions, topic Stream, codec Codec) {
+ o.builders.producer = t.EmitterProducerBuilder()
+ o.builders.topicmgr = t.TopicManagerBuilder()
+ t.RegisterEmitter(topic, codec)
+ }
+}
+func (opt *eoptions) applyOptions(topic Stream, codec Codec, opts ...EmitterOption) error {
+ opt.clientID = defaultClientID
+ opt.log = logger.Default()
+ opt.hasher = DefaultHasher()
+
+ for _, o := range opts {
+ o(opt, topic, codec)
+ }
+
+ // config not set, use default one
+ if opt.builders.producer == nil {
+ opt.builders.producer = kafka.DefaultProducerBuilder
+ }
+ if opt.builders.topicmgr == nil {
+ opt.builders.topicmgr = kafka.DefaultTopicManagerBuilder
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/lovoo/goka/partition.go b/vendor/github.com/lovoo/goka/partition.go
new file mode 100644
index 00000000..7462c4b6
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/partition.go
@@ -0,0 +1,448 @@
+package goka
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/logger"
+ "github.com/lovoo/goka/multierr"
+ "github.com/lovoo/goka/storage"
+
+ "github.com/Shopify/sarama"
+)
+
+const (
+ defaultPartitionChannelSize = 10
+ stallPeriod = 30 * time.Second
+ stalledTimeout = 2 * time.Minute
+)
+
+// partition represents one partition of a group table and handles the updates to
+// this table via UpdateCallback and ProcessCallback.
+//
+// partition can be started in two modes:
+// - catchup-mode: used by views, starts with startCatchup(), only UpdateCallback called
+// - processing-mode: used by processors, starts with start(),
+// recovers table with UpdateCallback
+// processes input streams with ProcessCallback
+//
+// The partition should never be called with a closed storage proxy.
+// - Before starting the partition in either way, the client must open the storage proxy.
+// - A partition may be restarted even if it returned errors. Before restarting
+// it, the client must call reinit().
+// - To release all resources, after stopping the partition, the client must
+// close the storage proxy.
+//
+type partition struct {
+ log logger.Logger
+ topic string
+
+ ch chan kafka.Event
+ st *storageProxy
+ proxy kafkaProxy
+ process processCallback
+
+ recoveredFlag int32
+ hwm int64
+ offset int64
+
+ recoveredOnce sync.Once
+
+ stats *PartitionStats
+ lastStats *PartitionStats
+ requestStats chan bool
+ responseStats chan *PartitionStats
+
+ droppedEvents []kafka.Event
+}
+
+type kafkaProxy interface {
+ Add(string, int64) error
+ Remove(string) error
+ AddGroup()
+ Stop()
+}
+
+type processCallback func(msg *message, st storage.Storage, wg *sync.WaitGroup, pstats *PartitionStats) (int, error)
+
+func newPartition(log logger.Logger, topic string, cb processCallback, st *storageProxy, proxy kafkaProxy, channelSize int) *partition {
+ return &partition{
+ log: log,
+ topic: topic,
+
+ ch: make(chan kafka.Event, channelSize),
+ st: st,
+ proxy: proxy,
+ process: cb,
+
+ stats: newPartitionStats(),
+ lastStats: newPartitionStats(),
+ requestStats: make(chan bool),
+ responseStats: make(chan *PartitionStats, 1),
+ }
+}
+
+// reinit reinitialzes the partition to connect to Kafka and start its goroutine
+func (p *partition) reinit(proxy kafkaProxy) {
+ if proxy != nil {
+ p.proxy = proxy
+ }
+}
+
+// start loads the table partition up to HWM and then consumes streams
+func (p *partition) start(ctx context.Context) error {
+ defer p.proxy.Stop()
+ p.stats.Table.StartTime = time.Now()
+
+ // init events
+ p.droppedEvents = make([]kafka.Event, 0)
+
+ if p.st.Stateless() {
+ if err := p.markRecovered(false); err != nil {
+ return fmt.Errorf("error marking stateless partition as recovered: %v", err)
+ }
+ } else if err := p.recover(ctx); err != nil {
+ return err
+ }
+
+ // if stopped, just return
+ select {
+ case <-ctx.Done():
+ return nil
+ default:
+ }
+
+ return p.run(ctx)
+}
+
+// startCatchup continually loads the table partition
+func (p *partition) startCatchup(ctx context.Context) error {
+ defer p.proxy.Stop()
+ p.stats.Table.StartTime = time.Now()
+
+ return p.catchup(ctx)
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// processing
+///////////////////////////////////////////////////////////////////////////////
+func newMessage(ev *kafka.Message) *message {
+ return &message{
+ Topic: ev.Topic,
+ Partition: ev.Partition,
+ Offset: ev.Offset,
+ Timestamp: ev.Timestamp,
+ Data: ev.Value,
+ Key: ev.Key,
+ Header: ev.Header,
+ }
+}
+
+func (p *partition) run(ctx context.Context) error {
+ var wg sync.WaitGroup
+ p.proxy.AddGroup()
+
+ defer func() {
+ done := make(chan struct{})
+ go func() {
+ wg.Wait()
+ close(done)
+ }()
+
+ select {
+ case <-done:
+ case <-time.NewTimer(10 * time.Second).C:
+ log.Printf("partition shutdown timed out. Will stop waiting.")
+ }
+ }()
+
+ // recover the dropped events
+ for _, ev := range p.droppedEvents {
+ select {
+ case p.ch <- ev:
+ case <-ctx.Done():
+ return nil
+ }
+ }
+
+ for {
+ select {
+ case ev, isOpen := <-p.ch:
+ // channel already closed, ev will be nil
+ if !isOpen {
+ return nil
+ }
+ switch ev := ev.(type) {
+ case *kafka.Message:
+ if ev.Topic == p.topic {
+ return fmt.Errorf("received message from group table topic after recovery: %s", p.topic)
+ }
+
+ updates, err := p.process(newMessage(ev), p.st, &wg, p.stats)
+ if err != nil {
+ return fmt.Errorf("error processing message: %v", err)
+ }
+ p.offset += int64(updates)
+ p.hwm = p.offset + 1
+
+ // metrics
+ s := p.stats.Input[ev.Topic]
+ s.Count++
+ s.Bytes += len(ev.Value)
+ if !ev.Timestamp.IsZero() {
+ s.Delay = time.Since(ev.Timestamp)
+ }
+ p.stats.Input[ev.Topic] = s
+
+ case *kafka.NOP:
+ // don't do anything but also don't log.
+ case *kafka.EOF:
+ // if ev.Topic != p.topic {
+ // return fmt.Errorf("received EOF of topic that is not ours. This should not happend (ours=%s, received=%s)", p.topic, ev.Topic)
+ // }
+ default:
+ return fmt.Errorf("load: cannot handle %T = %v", ev, ev)
+ }
+
+ case <-p.requestStats:
+ p.lastStats = newPartitionStats().init(p.stats, p.offset, p.hwm)
+ select {
+ case p.responseStats <- p.lastStats:
+ case <-ctx.Done():
+ p.log.Printf("Partitioning exiting, context is cancelled")
+ return nil
+ }
+
+ case <-ctx.Done():
+ p.log.Printf("Partitioning exiting, context is cancelled (outer)")
+ return nil
+ }
+
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// loading storage
+///////////////////////////////////////////////////////////////////////////////
+
+func (p *partition) catchup(ctx context.Context) error {
+ return p.load(ctx, true)
+}
+
+func (p *partition) recover(ctx context.Context) error {
+ return p.load(ctx, false)
+}
+
+func (p *partition) recovered() bool {
+ return atomic.LoadInt32(&p.recoveredFlag) == 1
+}
+
+func (p *partition) load(ctx context.Context, catchup bool) (rerr error) {
+ // fetch local offset
+ if local, err := p.st.GetOffset(sarama.OffsetOldest); err != nil {
+ return fmt.Errorf("error reading local offset: %v", err)
+ } else if err = p.proxy.Add(p.topic, local); err != nil {
+ return err
+ }
+
+ defer func() {
+ var derr multierr.Errors
+ _ = derr.Collect(rerr)
+ if e := p.proxy.Remove(p.topic); e != nil {
+ _ = derr.Collect(e)
+ }
+ rerr = derr.NilOrError()
+ }()
+
+ stallTicker := time.NewTicker(stallPeriod)
+ defer stallTicker.Stop()
+
+ // reset stats after load
+ defer p.stats.reset()
+
+ var lastMessage time.Time
+ for {
+ select {
+ case ev, isOpen := <-p.ch:
+
+ // channel already closed, ev will be nil
+ if !isOpen {
+ return nil
+ }
+
+ switch ev := ev.(type) {
+ case *kafka.BOF:
+ p.hwm = ev.Hwm
+
+ if ev.Offset == ev.Hwm {
+ // nothing to recover
+ if err := p.markRecovered(false); err != nil {
+ return fmt.Errorf("error setting recovered: %v", err)
+ }
+ }
+
+ case *kafka.EOF:
+ p.offset = ev.Hwm - 1
+ p.hwm = ev.Hwm
+
+ if err := p.markRecovered(catchup); err != nil {
+ return fmt.Errorf("error setting recovered: %v", err)
+ }
+
+ if catchup {
+ continue
+ }
+ return nil
+
+ case *kafka.Message:
+ lastMessage = time.Now()
+ if ev.Topic != p.topic {
+ p.log.Printf("dropping message from topic = %s while loading", ev.Topic)
+ // saving the dropped messages from input stream
+ p.droppedEvents = append(p.droppedEvents, ev)
+ continue
+ }
+ if err := p.storeEvent(ev); err != nil {
+ return fmt.Errorf("load: error updating storage: %v", err)
+ }
+ p.offset = ev.Offset
+ if p.offset >= p.hwm-1 {
+ if err := p.markRecovered(catchup); err != nil {
+ return fmt.Errorf("error setting recovered: %v", err)
+ }
+ }
+
+ // update metrics
+ s := p.stats.Input[ev.Topic]
+ s.Count++
+ s.Bytes += len(ev.Value)
+ if !ev.Timestamp.IsZero() {
+ s.Delay = time.Since(ev.Timestamp)
+ }
+ p.stats.Input[ev.Topic] = s
+ p.stats.Table.Stalled = false
+
+ case *kafka.NOP:
+ // don't do anything
+
+ default:
+ return fmt.Errorf("load: cannot handle %T = %v", ev, ev)
+ }
+
+ case now := <-stallTicker.C:
+ // only set to stalled, if the last message was earlier
+ // than the stalled timeout
+ if now.Sub(lastMessage) > stalledTimeout {
+ p.stats.Table.Stalled = true
+ }
+
+ case <-p.requestStats:
+ p.lastStats = newPartitionStats().init(p.stats, p.offset, p.hwm)
+ select {
+ case p.responseStats <- p.lastStats:
+ case <-ctx.Done():
+ return nil
+ }
+
+ case <-ctx.Done():
+ return nil
+ }
+ }
+}
+
+func (p *partition) storeEvent(msg *kafka.Message) error {
+ err := p.st.Update(msg.Key, msg.Value)
+ if err != nil {
+ return fmt.Errorf("Error from the update callback while recovering from the log: %v", err)
+ }
+ err = p.st.SetOffset(msg.Offset)
+ if err != nil {
+ return fmt.Errorf("Error updating offset in local storage while recovering from the log: %v", err)
+ }
+ return nil
+}
+
+// mark storage as recovered
+func (p *partition) markRecovered(catchup bool) (err error) {
+ p.recoveredOnce.Do(func() {
+ p.lastStats = newPartitionStats().init(p.stats, p.offset, p.hwm)
+ p.lastStats.Table.Status = PartitionPreparing
+
+ var (
+ done = make(chan bool)
+ wg sync.WaitGroup
+ )
+ if catchup {
+ // if catching up (views), stop reading from topic before marking
+ // partition as recovered to avoid blocking other partitions when
+ // p.ch gets full
+ if err = p.proxy.Remove(p.topic); err != nil {
+ return
+ }
+
+ // drain events channel -- we'll fetch them again later
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for {
+ select {
+ case <-p.ch:
+ case <-done:
+ return
+ }
+ }
+ }()
+ }
+
+ // mark storage as recovered -- this may take long
+ if err = p.st.MarkRecovered(); err != nil {
+ close(done)
+ return
+ }
+
+ if catchup {
+ close(done)
+ wg.Wait()
+ // start reading from topic again if in catchup mode
+ if err = p.proxy.Add(p.topic, p.hwm); err != nil {
+ return
+ }
+ }
+
+ // update stats
+ p.stats.Table.Status = PartitionRunning
+ p.stats.Table.RecoveryTime = time.Now()
+
+ atomic.StoreInt32(&p.recoveredFlag, 1)
+ })
+
+ // Be sure to mark partition as not stalled after EOF arrives, as
+ // this will not be written in the run-method
+ p.stats.Table.Stalled = false
+ return
+}
+
+func (p *partition) fetchStats(ctx context.Context) *PartitionStats {
+ timer := time.NewTimer(100 * time.Millisecond)
+ defer timer.Stop()
+
+ select {
+ case p.requestStats <- true:
+ case <-ctx.Done():
+ return newPartitionStats().init(p.lastStats, p.offset, p.hwm)
+ case <-timer.C:
+ return p.lastStats
+ }
+
+ select {
+ case s := <-p.responseStats:
+ return s
+ case <-ctx.Done():
+ return newPartitionStats().init(p.lastStats, p.offset, p.hwm)
+ }
+}
diff --git a/vendor/github.com/lovoo/goka/processor.go b/vendor/github.com/lovoo/goka/processor.go
new file mode 100644
index 00000000..b3f7dd66
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/processor.go
@@ -0,0 +1,904 @@
+package goka
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "runtime/debug"
+ "sync"
+ "time"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/logger"
+ "github.com/lovoo/goka/multierr"
+ "github.com/lovoo/goka/storage"
+)
+
+// Processor is a set of stateful callback functions that, on the arrival of
+// messages, modify the content of a table (the group table) and emit messages into other
+// topics. Messages as well as rows in the group table are key-value pairs.
+// A group is composed by multiple processor instances.
+type Processor struct {
+ opts *poptions
+ brokers []string
+
+ partitions map[int32]*partition
+ partitionViews map[int32]map[string]*partition
+ partitionCount int
+ views map[string]*View
+
+ graph *GroupGraph
+ m sync.RWMutex
+
+ consumer kafka.Consumer
+ producer kafka.Producer
+ asCh chan kafka.Assignment
+
+ errors *multierr.Errors
+ cancel func()
+ ctx context.Context
+}
+
+// message to be consumed
+type message struct {
+ Key string
+ Data []byte
+ Topic string
+ Partition int32
+ Offset int64
+ Timestamp time.Time
+ Header map[string][]byte
+}
+
+// ProcessCallback function is called for every message received by the
+// processor.
+type ProcessCallback func(ctx Context, msg interface{})
+
+// NewProcessor creates a processor instance in a group given the address of
+// Kafka brokers, the consumer group name, a list of subscriptions (topics,
+// codecs, and callbacks), and series of options.
+func NewProcessor(brokers []string, gg *GroupGraph, options ...ProcessorOption) (*Processor, error) {
+ options = append(
+ // default options comes first
+ []ProcessorOption{
+ WithLogger(logger.Default()),
+ WithUpdateCallback(DefaultUpdate),
+ WithPartitionChannelSize(defaultPartitionChannelSize),
+ WithStorageBuilder(storage.DefaultBuilder(DefaultProcessorStoragePath(gg.Group()))),
+ WithRebalanceCallback(DefaultRebalance),
+ },
+
+ // user-defined options (may overwrite default ones)
+ options...,
+ )
+
+ if err := gg.Validate(); err != nil {
+ return nil, err
+ }
+
+ opts := new(poptions)
+ err := opts.applyOptions(gg, options...)
+ if err != nil {
+ return nil, fmt.Errorf(errApplyOptions, err)
+ }
+
+ npar, err := prepareTopics(brokers, gg, opts)
+ if err != nil {
+ return nil, err
+ }
+
+ // create views
+ views := make(map[string]*View)
+ for _, t := range gg.LookupTables() {
+ view, err := NewView(brokers, Table(t.Topic()), t.Codec(),
+ WithViewLogger(opts.log),
+ WithViewHasher(opts.hasher),
+ WithViewPartitionChannelSize(opts.partitionChannelSize),
+ WithViewClientID(opts.clientID),
+ WithViewTopicManagerBuilder(opts.builders.topicmgr),
+ WithViewStorageBuilder(opts.builders.storage),
+ WithViewConsumerBuilder(opts.builders.consumer),
+ )
+ if err != nil {
+ return nil, fmt.Errorf("error creating view: %v", err)
+ }
+ views[t.Topic()] = view
+ }
+
+ // combine things together
+ processor := &Processor{
+ opts: opts,
+ brokers: brokers,
+
+ partitions: make(map[int32]*partition),
+ partitionViews: make(map[int32]map[string]*partition),
+ partitionCount: npar,
+ views: views,
+
+ graph: gg,
+
+ asCh: make(chan kafka.Assignment, 1),
+ }
+
+ return processor, nil
+}
+
+func prepareTopics(brokers []string, gg *GroupGraph, opts *poptions) (npar int, err error) {
+ // create topic manager
+ tm, err := opts.builders.topicmgr(brokers)
+ if err != nil {
+ return 0, fmt.Errorf("Error creating topic manager: %v", err)
+ }
+ defer func() {
+ e := tm.Close()
+ if e != nil && err == nil {
+ err = fmt.Errorf("Error closing topic manager: %v", e)
+ }
+ }()
+
+ // check co-partitioned (external) topics have the same number of partitions
+ npar, err = ensureCopartitioned(tm, gg.copartitioned().Topics())
+ if err != nil {
+ return 0, err
+ }
+
+ // TODO(diogo): add output topics
+ if ls := gg.LoopStream(); ls != nil {
+ ensureStreams := []string{ls.Topic()}
+ for _, t := range ensureStreams {
+ if err = tm.EnsureStreamExists(t, npar); err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ if gt := gg.GroupTable(); gt != nil {
+ if err = tm.EnsureTableExists(gt.Topic(), npar); err != nil {
+ return 0, err
+ }
+ }
+
+ return
+}
+
+// returns the number of partitions the topics have, and an error if topics are
+// not copartitionea.
+func ensureCopartitioned(tm kafka.TopicManager, topics []string) (int, error) {
+ var npar int
+ for _, topic := range topics {
+ partitions, err := tm.Partitions(topic)
+ if err != nil {
+ return 0, fmt.Errorf("Error fetching partitions for topic %s: %v", topic, err)
+ }
+
+ // check assumption that partitions are gap-less
+ for i, p := range partitions {
+ if i != int(p) {
+ return 0, fmt.Errorf("Topic %s has partition gap: %v", topic, partitions)
+ }
+ }
+
+ if npar == 0 {
+ npar = len(partitions)
+ }
+ if len(partitions) != npar {
+ return 0, fmt.Errorf("Topic %s does not have %d partitions", topic, npar)
+ }
+ }
+ return npar, nil
+}
+
+// isStateless returns whether the processor is a stateless one.
+func (g *Processor) isStateless() bool {
+ return g.graph.GroupTable() == nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// value getter
+///////////////////////////////////////////////////////////////////////////////
+
+// Get returns a read-only copy of a value from the group table if the
+// respective partition is owned by the processor instace.
+// Get can be called by multiple goroutines concurrently.
+// Get can be only used with stateful processors (ie, when group table is
+// enabled) and after Recovered returns true.
+func (g *Processor) Get(key string) (interface{}, error) {
+ if g.isStateless() {
+ return nil, fmt.Errorf("can't get a value from stateless processor")
+ }
+
+ // find partition where key is located
+ s, err := g.find(key)
+ if err != nil {
+ return nil, err
+ }
+
+ // get key and return
+ val, err := s.Get(key)
+ if err != nil {
+ return nil, fmt.Errorf("error getting %s: %v", key, err)
+ } else if val == nil {
+ // if the key does not exist the return value is nil
+ return nil, nil
+ }
+
+ // since we don't know what the codec does, make copy of the object
+ data := make([]byte, len(val))
+ copy(data, val)
+ value, err := g.graph.GroupTable().Codec().Decode(data)
+ if err != nil {
+ return nil, fmt.Errorf("error decoding %s: %v", key, err)
+ }
+ return value, nil
+}
+
+func (g *Processor) find(key string) (storage.Storage, error) {
+ p, err := g.hash(key)
+ if err != nil {
+ return nil, err
+ }
+
+ if _, ok := g.partitions[p]; !ok {
+ return nil, fmt.Errorf("this processor does not contain partition %v", p)
+ }
+
+ return g.partitions[p].st, nil
+}
+
+func (g *Processor) hash(key string) (int32, error) {
+ // create a new hasher every time. Alternative would be to store the hash in
+ // view and every time reset the hasher (ie, hasher.Reset()). But that would
+ // also require us to protect the access of the hasher with a mutex.
+ hasher := g.opts.hasher()
+
+ _, err := hasher.Write([]byte(key))
+ if err != nil {
+ return -1, err
+ }
+ hash := int32(hasher.Sum32())
+ if hash < 0 {
+ hash = -hash
+ }
+ if g.partitionCount == 0 {
+ return 0, errors.New("can't hash with 0 partitions")
+ }
+ return hash % int32(g.partitionCount), nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// lifecyle
+///////////////////////////////////////////////////////////////////////////////
+
+// Run starts receiving messages from Kafka for the subscribed topics. For each
+// partition, a recovery will be attempted. Cancel the context to stop the
+// processor.
+func (g *Processor) Run(ctx context.Context) (rerr error) {
+ g.opts.log.Printf("Processor [%s]: starting", g.graph.Group())
+ defer g.opts.log.Printf("Processor [%s]: stopped", g.graph.Group())
+
+ // create errorgroup
+ ctx, g.cancel = context.WithCancel(ctx)
+ errg, ctx := multierr.NewErrGroup(ctx)
+ g.ctx = ctx
+ defer g.cancel()
+
+ // collect all errors before leaving
+ g.errors = new(multierr.Errors)
+ defer func() {
+ rerr = g.errors.Collect(rerr).NilOrError()
+ }()
+
+ // create kafka consumer
+ g.opts.log.Printf("Processor [%s]: creating consumer ", g.graph.Group())
+ consumer, err := g.opts.builders.consumer(g.brokers, string(g.graph.Group()), g.opts.clientID)
+ if err != nil {
+ return fmt.Errorf(errBuildConsumer, err)
+ }
+ g.consumer = consumer
+ defer func() {
+ g.opts.log.Printf("Processor [%s]: closing consumer", g.graph.Group())
+ if err = g.consumer.Close(); err != nil {
+ g.errors.Collect(fmt.Errorf("error closing consumer: %v", err))
+ }
+ g.opts.log.Printf("Processor [%s]: closing consumer done", g.graph.Group())
+ }()
+
+ // create kafka producer
+ g.opts.log.Printf("Processor [%s]: creating producer", g.graph.Group())
+ producer, err := g.opts.builders.producer(g.brokers, g.opts.clientID, g.opts.hasher)
+ if err != nil {
+ return fmt.Errorf(errBuildProducer, err)
+ }
+ g.producer = producer
+ defer func() {
+ g.opts.log.Printf("Processor [%s]: closing producer", g.graph.Group())
+ if err := g.producer.Close(); err != nil {
+ g.errors.Collect(fmt.Errorf("error closing producer: %v", err))
+ }
+ g.opts.log.Printf("Processor [%s]: closing producer done.", g.graph.Group())
+ }()
+
+ // start all views
+ for t, v := range g.views {
+ t, v := t, v
+ errg.Go(func() error {
+ if err := v.Run(ctx); err != nil {
+ return fmt.Errorf("error starting lookup table %s: %v", t, err)
+ }
+ return nil
+ })
+ defer func() { g.errors.Collect(v.Terminate()) }()
+ }
+
+ // subscribe for streams
+ topics := make(map[string]int64)
+ for _, e := range g.graph.InputStreams() {
+ topics[e.Topic()] = -1
+ }
+ if lt := g.graph.LoopStream(); lt != nil {
+ topics[lt.Topic()] = -1
+ }
+ if err := g.consumer.Subscribe(topics); err != nil {
+ g.cancel()
+ g.errors.Merge(errg.Wait())
+ return fmt.Errorf("error subscribing topics: %v", err)
+ }
+
+ // start processor dispatcher
+ errg.Go(func() error {
+ g.asCh <- kafka.Assignment{}
+ return g.waitAssignment(ctx)
+ })
+
+ // wait for goroutines to return
+ g.errors.Merge(errg.Wait())
+
+ // remove all partitions first
+ g.opts.log.Printf("Processor [%s]: removing partitions", g.graph.Group())
+ g.errors.Merge(g.removePartitions())
+
+ return
+}
+
+func (g *Processor) pushToPartition(ctx context.Context, part int32, ev kafka.Event) error {
+ p, ok := g.partitions[part]
+ if !ok {
+ return fmt.Errorf("dropping message, no partition yet: %v", ev)
+ }
+ select {
+ case p.ch <- ev:
+ case <-ctx.Done():
+ }
+ return nil
+}
+
+func (g *Processor) pushToPartitionView(ctx context.Context, topic string, part int32, ev kafka.Event) error {
+ views, ok := g.partitionViews[part]
+ if !ok {
+ return fmt.Errorf("dropping message, no partition yet: %v", ev)
+ }
+ p, ok := views[topic]
+ if !ok {
+ return fmt.Errorf("dropping message, no view yet: %v", ev)
+ }
+ select {
+ case p.ch <- ev:
+ case <-ctx.Done():
+ }
+ return nil
+}
+
+func (g *Processor) waitAssignment(ctx context.Context) error {
+ for {
+ select {
+ case <-ctx.Done():
+ g.opts.log.Printf("Processor [%s]: context cancelled, will stop the assignment loop", g.graph.Group())
+ return nil
+ case a := <-g.asCh:
+ if err := g.runAssignment(ctx, a); err != nil {
+ return err
+ }
+ }
+ }
+}
+
+func (g *Processor) runAssignment(ctx context.Context, a kafka.Assignment) error {
+ errs := new(multierr.Errors)
+ ctx, cancel := context.WithCancel(ctx)
+ errg, ctx := multierr.NewErrGroup(ctx)
+ defer cancel()
+
+ // create partitions based on assignmend
+ if err := g.rebalance(errg, ctx, a); err.HasErrors() {
+ return errs.Collect(err).NilOrError()
+ }
+
+ // start dispatcher
+ errg.Go(func() error {
+ err := g.dispatcher(ctx)
+ // cancel context even if dispatcher returned nil -- can only be a rebalance
+ cancel()
+ return err
+ })
+
+ // wait until dispatcher or partitions have returned
+ errs.Merge(errg.Wait())
+
+ // all partitions should have returned at this point, so clean up
+ errs.Merge(g.removePartitions())
+
+ return errs.NilOrError()
+}
+
+func (g *Processor) dispatcher(ctx context.Context) error {
+ g.opts.log.Printf("Processor: dispatcher started")
+ defer g.opts.log.Printf("Processor: dispatcher stopped")
+
+ for {
+ select {
+ case ev := <-g.consumer.Events():
+ switch ev := ev.(type) {
+ case *kafka.Assignment:
+ g.asCh <- *ev
+ return nil
+
+ case *kafka.Message:
+ var err error
+ if g.graph.joint(ev.Topic) {
+ err = g.pushToPartitionView(ctx, ev.Topic, ev.Partition, ev)
+ } else {
+ err = g.pushToPartition(ctx, ev.Partition, ev)
+ }
+ if err != nil {
+ return fmt.Errorf("error consuming message: %v", err)
+ }
+
+ case *kafka.BOF:
+ var err error
+ if g.graph.joint(ev.Topic) {
+ err = g.pushToPartitionView(ctx, ev.Topic, ev.Partition, ev)
+ } else {
+ err = g.pushToPartition(ctx, ev.Partition, ev)
+ }
+ if err != nil {
+ return fmt.Errorf("error consuming BOF: %v", err)
+ }
+
+ case *kafka.EOF:
+ var err error
+ if g.graph.joint(ev.Topic) {
+ err = g.pushToPartitionView(ctx, ev.Topic, ev.Partition, ev)
+ } else {
+ err = g.pushToPartition(ctx, ev.Partition, ev)
+ }
+ if err != nil {
+ return fmt.Errorf("error consuming EOF: %v", err)
+ }
+
+ case *kafka.NOP:
+ if g.graph.joint(ev.Topic) {
+ _ = g.pushToPartitionView(ctx, ev.Topic, ev.Partition, ev)
+ } else {
+ _ = g.pushToPartition(ctx, ev.Partition, ev)
+ }
+
+ case *kafka.Error:
+ return fmt.Errorf("kafka error: %v", ev.Err)
+
+ default:
+ return fmt.Errorf("processor: cannot handle %T = %v", ev, ev)
+ }
+ case <-ctx.Done():
+ return nil
+ }
+ }
+}
+
+func (g *Processor) fail(err error) {
+ g.opts.log.Printf("failing: %v", err)
+ g.errors.Collect(err)
+ g.cancel()
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// partition management (rebalance)
+///////////////////////////////////////////////////////////////////////////////
+
+func (g *Processor) newJoinStorage(topic string, id int32, update UpdateCallback) (*storageProxy, error) {
+ st, err := g.opts.builders.storage(topic, id)
+ if err != nil {
+ return nil, err
+ }
+ return &storageProxy{
+ Storage: st,
+ partition: id,
+ update: update,
+ }, nil
+}
+
+func (g *Processor) newStorage(topic string, id int32, update UpdateCallback) (*storageProxy, error) {
+ if g.isStateless() {
+ return &storageProxy{
+ Storage: storage.NewMemory(),
+ partition: id,
+ stateless: true,
+ }, nil
+ }
+
+ var (
+ err error
+ st storage.Storage
+ wg sync.WaitGroup
+ )
+ start := time.Now()
+ ticker := time.NewTicker(5 * time.Minute)
+ defer ticker.Stop()
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ st, err = g.opts.builders.storage(topic, id)
+ g.opts.log.Printf("finished building storage for topic %s", topic)
+ }()
+ go func() {
+ for range ticker.C {
+ g.opts.log.Printf("building storage for topic %s for %s ...", topic, time.Since(start).String())
+ }
+ }()
+ wg.Wait()
+ if err != nil {
+ return nil, err
+ }
+
+ return &storageProxy{
+ Storage: st,
+ partition: id,
+ update: update,
+ }, nil
+}
+
+func (g *Processor) createPartitionViews(errg *multierr.ErrGroup, ctx context.Context, id int32) error {
+ if _, has := g.partitionViews[id]; !has {
+ g.partitionViews[id] = make(map[string]*partition)
+ }
+
+ for _, t := range g.graph.JointTables() {
+ if _, has := g.partitions[id]; has {
+ continue
+ }
+ st, err := g.newJoinStorage(t.Topic(), id, DefaultUpdate)
+ if err != nil {
+ return fmt.Errorf("processor: error creating storage: %v", err)
+ }
+ p := newPartition(
+ g.opts.log,
+ t.Topic(),
+ nil, st, &proxy{id, g.consumer},
+ g.opts.partitionChannelSize,
+ )
+ g.partitionViews[id][t.Topic()] = p
+
+ errg.Go(func() (err error) {
+ defer func() {
+ if rerr := recover(); rerr != nil {
+ g.opts.log.Printf("partition view %s/%d: panic", p.topic, id)
+ err = fmt.Errorf("panic partition view %s/%d: %v\nstack:%v",
+ p.topic, id, rerr, string(debug.Stack()))
+ }
+ }()
+
+ if err = p.st.Open(); err != nil {
+ return fmt.Errorf("error opening storage %s/%d: %v", p.topic, id, err)
+ }
+ if err = p.startCatchup(ctx); err != nil {
+ return fmt.Errorf("error in partition view %s/%d: %v", p.topic, id, err)
+ }
+ g.opts.log.Printf("partition view %s/%d: exit", p.topic, id)
+ return nil
+ })
+ }
+ return nil
+}
+
+func (g *Processor) createPartition(errg *multierr.ErrGroup, ctx context.Context, id int32) error {
+ if _, has := g.partitions[id]; has {
+ return nil
+ }
+ // TODO(diogo) what name to use for stateless processors?
+ var groupTable string
+ if gt := g.graph.GroupTable(); gt != nil {
+ groupTable = gt.Topic()
+ }
+ st, err := g.newStorage(groupTable, id, g.opts.updateCallback)
+ if err != nil {
+ return fmt.Errorf("processor: error creating storage: %v", err)
+ }
+
+ // collect dependencies
+ var wait []func() bool
+ if pviews, has := g.partitionViews[id]; has {
+ for _, p := range pviews {
+ wait = append(wait, p.recovered)
+ }
+ }
+ for _, v := range g.views {
+ wait = append(wait, v.Recovered)
+ }
+
+ g.partitions[id] = newPartition(
+ g.opts.log,
+ groupTable,
+ g.process, st, &delayProxy{proxy: proxy{partition: id, consumer: g.consumer}, wait: wait},
+ g.opts.partitionChannelSize,
+ )
+ par := g.partitions[id]
+ errg.Go(func() (err error) {
+ defer func() {
+ if rerr := recover(); rerr != nil {
+ g.opts.log.Printf("partition %s/%d: panic", par.topic, id)
+ err = fmt.Errorf("partition %s/%d: panic: %v\nstack:%v",
+ par.topic, id, rerr, string(debug.Stack()))
+ }
+ }()
+ if err = par.st.Open(); err != nil {
+ return fmt.Errorf("error opening storage partition %d: %v", id, err)
+ }
+ if err = par.start(ctx); err != nil {
+ return fmt.Errorf("error in partition %d: %v", id, err)
+ }
+ g.opts.log.Printf("partition %s/%d: exit", par.topic, id)
+ return nil
+ })
+
+ return nil
+}
+
+func (g *Processor) rebalance(errg *multierr.ErrGroup, ctx context.Context, partitions kafka.Assignment) *multierr.Errors {
+ errs := new(multierr.Errors)
+ g.opts.log.Printf("Processor: rebalancing: %+v", partitions)
+
+ // callback the new partition assignment
+ g.opts.rebalanceCallback(partitions)
+
+ g.m.Lock()
+ defer g.m.Unlock()
+
+ for id := range partitions {
+ // create partition views
+ if err := g.createPartitionViews(errg, ctx, id); err != nil {
+ errs.Collect(err)
+ }
+ // create partition processor
+ if err := g.createPartition(errg, ctx, id); err != nil {
+ errs.Collect(err)
+ }
+ }
+ return errs
+}
+
+func (g *Processor) removePartitions() *multierr.Errors {
+ g.m.Lock()
+ defer g.m.Unlock()
+ errs := new(multierr.Errors)
+ for partition := range g.partitions {
+ errs.Merge(g.removePartition(partition))
+ }
+ return errs
+}
+
+func (g *Processor) removePartition(partition int32) *multierr.Errors {
+ errs := new(multierr.Errors)
+ g.opts.log.Printf("Removing partition %d", partition)
+
+ // remove partition processor
+ if err := g.partitions[partition].st.Close(); err != nil {
+ errs.Collect(fmt.Errorf("error closing storage partition %d: %v", partition, err))
+ }
+ delete(g.partitions, partition)
+
+ // remove partition views
+ pv, has := g.partitionViews[partition]
+ if !has {
+ return errs
+ }
+
+ for topic, p := range pv {
+ if err := p.st.Close(); err != nil {
+ errs.Collect(fmt.Errorf("error closing storage %s/%d: %v", topic, partition, err))
+ }
+ }
+ delete(g.partitionViews, partition)
+
+ return errs
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// context builder
+///////////////////////////////////////////////////////////////////////////////
+
+func (g *Processor) process(msg *message, st storage.Storage, wg *sync.WaitGroup, pstats *PartitionStats) (int, error) {
+ g.m.RLock()
+ views := g.partitionViews[msg.Partition]
+ g.m.RUnlock()
+
+ ctx := &cbContext{
+ ctx: g.ctx,
+ graph: g.graph,
+
+ pstats: pstats,
+ pviews: views,
+ views: g.views,
+ wg: wg,
+ msg: msg,
+ failer: func(err error) {
+ // only fail processor if context not already Done
+ select {
+ case <-g.ctx.Done():
+ return
+ default:
+ }
+ g.fail(err)
+ },
+ emitter: func(topic string, key string, value []byte) *kafka.Promise {
+ return g.producer.Emit(topic, key, value).Then(func(err error) {
+ if err != nil {
+ g.fail(err)
+ }
+ })
+ },
+ }
+ ctx.commit = func() {
+ // write group table offset to local storage
+ if ctx.counters.stores > 0 {
+ if offset, err := ctx.storage.GetOffset(0); err != nil {
+ ctx.failer(fmt.Errorf("error getting storage offset for %s/%d: %v",
+ g.graph.GroupTable().Topic(), msg.Partition, err))
+ return
+ } else if err = ctx.storage.SetOffset(offset + int64(ctx.counters.stores)); err != nil {
+ ctx.failer(fmt.Errorf("error writing storage offset for %s/%d: %v",
+ g.graph.GroupTable().Topic(), msg.Partition, err))
+ return
+ }
+ }
+
+ // mark upstream offset
+ if err := g.consumer.Commit(msg.Topic, msg.Partition, msg.Offset); err != nil {
+ g.fail(fmt.Errorf("error committing offsets of %s/%d: %v",
+ g.graph.GroupTable().Topic(), msg.Partition, err))
+ }
+ }
+
+ // use the storage if the processor is not stateless. Ignore otherwise
+ if !g.isStateless() {
+ ctx.storage = st
+ }
+
+ var (
+ m interface{}
+ err error
+ )
+
+ // decide whether to decode or ignore message
+ switch {
+ case msg.Data == nil && g.opts.nilHandling == NilIgnore:
+ // drop nil messages
+ return 0, nil
+ case msg.Data == nil && g.opts.nilHandling == NilProcess:
+ // process nil messages without decoding them
+ m = nil
+ default:
+ // get stream subcription
+ codec := g.graph.codec(msg.Topic)
+ if codec == nil {
+ return 0, fmt.Errorf("cannot handle topic %s", msg.Topic)
+ }
+
+ // decode message
+ m, err = codec.Decode(msg.Data)
+ if err != nil {
+ return 0, fmt.Errorf("error decoding message for key %s from %s/%d: %v", msg.Key, msg.Topic, msg.Partition, err)
+ }
+ }
+
+ cb := g.graph.callback(msg.Topic)
+ if cb == nil {
+ return 0, fmt.Errorf("error processing message for key %s from %s/%d: %v", msg.Key, msg.Topic, msg.Partition, err)
+ }
+
+ // start context and call the ProcessorCallback cb
+ ctx.start()
+ // call finish(err) if a panic occurs in cb
+ defer func() {
+ if r := recover(); r != nil {
+ ctx.finish(fmt.Errorf("panic: %v", r))
+ panic(r) // propagate panic up
+ }
+ }()
+ // now call cb
+ cb(ctx, m)
+ // if everything went fine, call finish(nil)
+ ctx.finish(nil)
+
+ return ctx.counters.stores, nil
+}
+
+// Recovered returns true when the processor has caught up with events from kafka.
+func (g *Processor) Recovered() bool {
+ for _, v := range g.views {
+ if !v.Recovered() {
+ return false
+ }
+ }
+
+ for _, part := range g.partitionViews {
+ for _, topicPart := range part {
+ if !topicPart.recovered() {
+ return false
+ }
+ }
+ }
+
+ for _, p := range g.partitions {
+ if !p.recovered() {
+ return false
+ }
+ }
+
+ return true
+}
+
+// Stats returns a set of performance metrics of the processor.
+func (g *Processor) Stats() *ProcessorStats {
+ return g.statsWithContext(context.Background())
+}
+
+func (g *Processor) statsWithContext(ctx context.Context) *ProcessorStats {
+ var (
+ m sync.Mutex
+ wg sync.WaitGroup
+ stats = newProcessorStats(len(g.partitions))
+ )
+
+ for i, p := range g.partitions {
+ wg.Add(1)
+ go func(pid int32, par *partition) {
+ s := par.fetchStats(ctx)
+ m.Lock()
+ stats.Group[pid] = s
+ m.Unlock()
+ wg.Done()
+ }(i, p)
+ }
+ for i, p := range g.partitionViews {
+ if _, ok := stats.Joined[i]; !ok {
+ stats.Joined[i] = make(map[string]*PartitionStats)
+ }
+ for t, tp := range p {
+ wg.Add(1)
+ go func(pid int32, topic string, par *partition) {
+ s := par.fetchStats(ctx)
+ m.Lock()
+ stats.Joined[pid][topic] = s
+ m.Unlock()
+ wg.Done()
+ }(i, t, tp)
+ }
+ }
+ for t, v := range g.views {
+ wg.Add(1)
+ go func(topic string, vi *View) {
+ s := vi.statsWithContext(ctx)
+ m.Lock()
+ stats.Lookup[topic] = s
+ m.Unlock()
+ wg.Done()
+ }(t, v)
+ }
+
+ wg.Wait()
+ return stats
+}
+
+// Graph returns the GroupGraph given at the creation of the processor.
+func (g *Processor) Graph() *GroupGraph {
+ return g.graph
+}
diff --git a/vendor/github.com/lovoo/goka/proxy.go b/vendor/github.com/lovoo/goka/proxy.go
new file mode 100644
index 00000000..48946ab0
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/proxy.go
@@ -0,0 +1,122 @@
+package goka
+
+import (
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/storage"
+)
+
+const (
+ delayProxyInterval = 1 * time.Second
+)
+
+type proxy struct {
+ partition int32
+ consumer kafka.Consumer
+}
+
+func (p *proxy) Add(topic string, offset int64) error {
+ if err := p.consumer.AddPartition(topic, p.partition, offset); err != nil {
+ return fmt.Errorf("error adding %s/%d: %v", topic, p.partition, err)
+ }
+ return nil
+}
+
+func (p *proxy) Remove(topic string) error {
+ if err := p.consumer.RemovePartition(topic, p.partition); err != nil {
+ return fmt.Errorf("error removing %s/%d: %v", topic, p.partition, err)
+ }
+ return nil
+}
+
+func (p *proxy) AddGroup() {
+ p.consumer.AddGroupPartition(p.partition)
+}
+
+func (p *proxy) Stop() {}
+
+type delayProxy struct {
+ proxy
+ stop bool
+ m sync.Mutex
+ wait []func() bool
+}
+
+func (p *delayProxy) waitersDone() bool {
+ for _, r := range p.wait {
+ if !r() {
+ return false
+ }
+ }
+ return true
+}
+
+func (p *delayProxy) AddGroup() {
+ if len(p.wait) == 0 {
+ p.consumer.AddGroupPartition(p.partition)
+ return
+ }
+
+ go func() {
+ ticker := time.NewTicker(delayProxyInterval)
+ defer ticker.Stop()
+ for range ticker.C {
+ p.m.Lock()
+ if p.stop {
+ p.m.Unlock()
+ return
+ }
+ if p.waitersDone() {
+ p.consumer.AddGroupPartition(p.partition)
+ p.m.Unlock()
+ return
+ }
+ p.m.Unlock()
+ }
+ }()
+}
+
+func (p *delayProxy) Stop() {
+ p.m.Lock()
+ p.stop = true
+ p.m.Unlock()
+}
+
+type storageProxy struct {
+ storage.Storage
+ partition int32
+ stateless bool
+ update UpdateCallback
+
+ openedOnce once
+ closedOnce once
+}
+
+func (s *storageProxy) Open() error {
+ if s == nil {
+ return nil
+ }
+ return s.openedOnce.Do(s.Storage.Open)
+}
+
+func (s *storageProxy) Close() error {
+ if s == nil {
+ return nil
+ }
+ return s.closedOnce.Do(s.Storage.Close)
+}
+
+func (s *storageProxy) Update(k string, v []byte) error {
+ return s.update(s.Storage, s.partition, k, v)
+}
+
+func (s *storageProxy) Stateless() bool {
+ return s.stateless
+}
+
+func (s *storageProxy) MarkRecovered() error {
+ return s.Storage.MarkRecovered()
+}
diff --git a/vendor/github.com/lovoo/goka/stats.go b/vendor/github.com/lovoo/goka/stats.go
new file mode 100644
index 00000000..8308bfd6
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/stats.go
@@ -0,0 +1,113 @@
+package goka
+
+import "time"
+
+// InputStats represents the number of messages and the number of bytes consumed
+// from a stream or table topic since the process started.
+type InputStats struct {
+ Count uint
+ Bytes int
+ Delay time.Duration
+}
+
+// OutputStats represents the number of messages and the number of bytes emitted
+// into a stream or table since the process started.
+type OutputStats struct {
+ Count uint
+ Bytes int
+}
+
+// PartitionStatus is the status of the partition of a table (group table or joined table).
+type PartitionStatus int
+
+const (
+ // PartitionRecovering indicates the partition is recovering and the storage
+ // is writing updates in bulk-mode (if the storage implementation supports it).
+ PartitionRecovering PartitionStatus = iota
+ // PartitionPreparing indicates the end of the bulk-mode. Depending on the storage
+ // implementation, the Preparing phase may take long because the storage compacts its logs.
+ PartitionPreparing
+ // PartitionRunning indicates the partition is recovered and processing updates
+ // in normal operation.
+ PartitionRunning
+)
+
+// PartitionStats represents metrics and measurements of a partition.
+type PartitionStats struct {
+ Now time.Time
+
+ Table struct {
+ Status PartitionStatus
+ Stalled bool
+
+ Offset int64 // last offset processed or recovered
+ Hwm int64 // next offset to be written
+
+ StartTime time.Time
+ RecoveryTime time.Time
+ }
+ Input map[string]InputStats
+ Output map[string]OutputStats
+}
+
+func newPartitionStats() *PartitionStats {
+ return &PartitionStats{
+ Now: time.Now(),
+ Input: make(map[string]InputStats),
+ Output: make(map[string]OutputStats),
+ }
+}
+
+func (s *PartitionStats) init(o *PartitionStats, offset, hwm int64) *PartitionStats {
+ s.Table.Status = o.Table.Status
+ s.Table.Stalled = o.Table.Stalled
+ s.Table.StartTime = o.Table.StartTime
+ s.Table.RecoveryTime = o.Table.RecoveryTime
+ s.Table.Offset = offset
+ s.Table.Hwm = hwm
+ s.Now = time.Now()
+ for k, v := range o.Input {
+ s.Input[k] = v
+ }
+ for k, v := range o.Output {
+ s.Output[k] = v
+ }
+ return s
+}
+
+func (s *PartitionStats) reset() {
+ s.Input = make(map[string]InputStats)
+ s.Output = make(map[string]OutputStats)
+}
+
+// ViewStats represents the metrics of all partitions of a view.
+type ViewStats struct {
+ Partitions map[int32]*PartitionStats
+}
+
+func newViewStats() *ViewStats {
+ return &ViewStats{
+ Partitions: make(map[int32]*PartitionStats),
+ }
+}
+
+// ProcessorStats represents the metrics of all partitions of the processor,
+// including its group, joined tables and lookup tables.
+type ProcessorStats struct {
+ Group map[int32]*PartitionStats
+ Joined map[int32]map[string]*PartitionStats
+ Lookup map[string]*ViewStats
+}
+
+func newProcessorStats(partitions int) *ProcessorStats {
+ stats := &ProcessorStats{
+ Group: make(map[int32]*PartitionStats),
+ Joined: make(map[int32]map[string]*PartitionStats),
+ Lookup: make(map[string]*ViewStats),
+ }
+
+ for i := int32(0); i < int32(partitions); i++ {
+ stats.Joined[i] = make(map[string]*PartitionStats)
+ }
+ return stats
+}
diff --git a/vendor/github.com/lovoo/goka/storage/append.go b/vendor/github.com/lovoo/goka/storage/append.go
new file mode 100644
index 00000000..f76ffd21
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/append.go
@@ -0,0 +1,89 @@
+package storage
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+)
+
+type file struct {
+ file io.WriteCloser
+ recovered bool
+
+ bytesWritten int64
+}
+
+// NewFile retuns a new on-disk storage.
+func NewFile(path string, part int32) (Storage, error) {
+ if err := os.MkdirAll(path, os.ModePerm); err != nil {
+ return nil, fmt.Errorf("error creating storage directory: %v", err)
+ }
+
+ f, err := os.OpenFile(filepath.Join(path, fmt.Sprintf("part-%d", part)), os.O_CREATE|os.O_RDWR|os.O_APPEND, os.ModePerm)
+ if err != nil {
+ return nil, err
+ }
+
+ return &file{file: f}, nil
+}
+
+func (f *file) Recovered() bool {
+ return f.recovered
+}
+
+func (f *file) MarkRecovered() error {
+ f.recovered = true
+ return nil
+}
+
+func (f *file) Has(key string) (bool, error) {
+ return false, nil
+}
+
+func (f *file) Get(key string) ([]byte, error) {
+ return nil, nil
+}
+
+func (f *file) Set(key string, val []byte) error {
+ num, err := f.file.Write(val)
+ if err != nil {
+ return err
+ }
+
+ f.bytesWritten += int64(num)
+
+ if _, err := f.file.Write([]byte("\n")); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (f *file) Delete(string) error {
+ return nil
+}
+
+func (f *file) GetOffset(def int64) (int64, error) {
+ return def, nil
+}
+
+func (f *file) SetOffset(val int64) error {
+ return nil
+}
+
+func (f *file) Iterator() (Iterator, error) {
+ return new(NullIter), nil
+}
+
+func (f *file) IteratorWithRange(start, limit []byte) (Iterator, error) {
+ return new(NullIter), nil
+}
+
+func (f *file) Open() error {
+ return nil
+}
+
+func (f *file) Close() error {
+ return f.file.Close()
+}
diff --git a/vendor/github.com/lovoo/goka/storage/builders.go b/vendor/github.com/lovoo/goka/storage/builders.go
new file mode 100644
index 00000000..cd58ec38
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/builders.go
@@ -0,0 +1,46 @@
+package storage
+
+import (
+ "fmt"
+ "path/filepath"
+
+ "github.com/syndtr/goleveldb/leveldb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+// Builder creates a local storage (a persistent cache) for a topic
+// table. Builder creates one storage for each partition of the topic.
+type Builder func(topic string, partition int32) (Storage, error)
+
+// DefaultBuilder builds a LevelDB storage with default configuration.
+// The database will be stored in the given path.
+func DefaultBuilder(path string) Builder {
+ return func(topic string, partition int32) (Storage, error) {
+ fp := filepath.Join(path, fmt.Sprintf("%s.%d", topic, partition))
+ db, err := leveldb.OpenFile(fp, nil)
+ if err != nil {
+ return nil, fmt.Errorf("error opening leveldb: %v", err)
+ }
+ return New(db)
+ }
+}
+
+// BuilderWithOptions builds LevelDB storage with the given options and
+// in the given path.
+func BuilderWithOptions(path string, opts *opt.Options) Builder {
+ return func(topic string, partition int32) (Storage, error) {
+ fp := filepath.Join(path, fmt.Sprintf("%s.%d", topic, partition))
+ db, err := leveldb.OpenFile(fp, opts)
+ if err != nil {
+ return nil, fmt.Errorf("error opening leveldb: %v", err)
+ }
+ return New(db)
+ }
+}
+
+// MemoryBuilder builds in-memory storage.
+func MemoryBuilder() Builder {
+ return func(topic string, partition int32) (Storage, error) {
+ return NewMemory(), nil
+ }
+}
diff --git a/vendor/github.com/lovoo/goka/storage/iterator.go b/vendor/github.com/lovoo/goka/storage/iterator.go
new file mode 100644
index 00000000..e03dd938
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/iterator.go
@@ -0,0 +1,55 @@
+package storage
+
+import (
+ "github.com/syndtr/goleveldb/leveldb"
+ ldbiter "github.com/syndtr/goleveldb/leveldb/iterator"
+)
+
+// iterator wraps an Iterator implementation and handles the value decoding and
+// offset key skipping.
+type iterator struct {
+ iter ldbiter.Iterator
+ snap *leveldb.Snapshot
+}
+
+// Next advances the iterator to the next key.
+func (i *iterator) Next() bool {
+ next := i.iter.Next()
+ if string(i.iter.Key()) == offsetKey {
+ next = i.iter.Next()
+ }
+
+ return next
+}
+
+// Err should be called after Next returns false to check for possible
+// iteration errors.
+func (i *iterator) Err() error {
+ return i.iter.Error()
+}
+
+// Key returns the current key.
+func (i *iterator) Key() []byte {
+ return i.iter.Key()
+}
+
+// Value returns the current value decoded by the codec of the storage.
+func (i *iterator) Value() ([]byte, error) {
+ data := i.iter.Value()
+ if data == nil {
+ return nil, nil
+ }
+
+ return data, nil
+}
+
+// Releases releases the iterator and the associated snapshot. The iterator is
+// not usable anymore after calling Release.
+func (i *iterator) Release() {
+ i.iter.Release()
+ i.snap.Release()
+}
+
+func (i *iterator) Seek(key []byte) bool {
+ return i.iter.Seek(key)
+}
diff --git a/vendor/github.com/lovoo/goka/storage/memory.go b/vendor/github.com/lovoo/goka/storage/memory.go
new file mode 100644
index 00000000..40150fa6
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/memory.go
@@ -0,0 +1,156 @@
+package storage
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type memiter struct {
+ current int
+ keys []string
+ storage map[string][]byte
+}
+
+func (i *memiter) exhausted() bool {
+ return len(i.keys) <= i.current
+}
+
+func (i *memiter) Next() bool {
+ i.current++
+ if string(i.Key()) == offsetKey {
+ i.current++
+ }
+ return !i.exhausted()
+}
+
+func (*memiter) Err() error {
+ return nil
+}
+
+func (i *memiter) Key() []byte {
+ if i.exhausted() {
+ return nil
+ }
+
+ return []byte(i.keys[i.current])
+}
+
+func (i *memiter) Value() ([]byte, error) {
+ if i.exhausted() {
+ return nil, nil
+ }
+
+ return i.storage[i.keys[i.current]], nil
+}
+
+func (i *memiter) Release() {
+ // mark the iterator as exhausted
+ i.current = len(i.keys)
+}
+
+func (i *memiter) Seek(key []byte) bool {
+ seek := make(map[string][]byte)
+ keys := []string{}
+ for k, v := range i.storage {
+ if strings.Contains(k, string(key)) {
+ keys = append(keys, k)
+ seek[k] = v
+ }
+ }
+ i.current = -1
+ i.storage = seek
+ i.keys = keys
+ return !i.exhausted()
+}
+
+type memory struct {
+ storage map[string][]byte
+ offset *int64
+ recovered bool
+}
+
+// NewMemory returns a new in-memory storage.
+func NewMemory() Storage {
+ return &memory{
+ storage: make(map[string][]byte),
+ recovered: false,
+ }
+}
+
+func (m *memory) Has(key string) (bool, error) {
+ _, has := m.storage[key]
+ return has, nil
+}
+
+func (m *memory) Get(key string) ([]byte, error) {
+ value, _ := m.storage[key]
+ return value, nil
+}
+
+func (m *memory) Set(key string, value []byte) error {
+ if value == nil {
+ return fmt.Errorf("cannot write nil value")
+ }
+ m.storage[key] = value
+ return nil
+}
+
+func (m *memory) Delete(key string) error {
+ delete(m.storage, key)
+ return nil
+}
+
+func (m *memory) Iterator() (Iterator, error) {
+ keys := make([]string, 0, len(m.storage))
+ for k := range m.storage {
+ keys = append(keys, k)
+ }
+
+ return &memiter{-1, keys, m.storage}, nil
+}
+
+func (m *memory) IteratorWithRange(start, limit []byte) (Iterator, error) {
+ keys := []string{} // using slice as keys has an unknown size
+ if len(limit) == 0 {
+ limit = util.BytesPrefix(start).Limit
+ }
+ for k := range m.storage {
+ if bytes.Compare([]byte(k), start) > -1 && bytes.Compare([]byte(k), limit) < 1 {
+ keys = append(keys, k)
+ }
+ }
+
+ return &memiter{-1, keys, m.storage}, nil
+}
+
+func (m *memory) MarkRecovered() error {
+ return nil
+}
+
+func (m *memory) Recovered() bool {
+ return m.recovered
+}
+
+func (m *memory) SetOffset(offset int64) error {
+ m.offset = &offset
+ return nil
+}
+
+func (m *memory) GetOffset(defValue int64) (int64, error) {
+ if m.offset == nil {
+ return defValue, nil
+ }
+
+ return *m.offset, nil
+}
+
+func (m *memory) Open() error {
+ return nil
+}
+
+func (m *memory) Close() error {
+ return nil
+}
diff --git a/vendor/github.com/lovoo/goka/storage/merge_iterator.go b/vendor/github.com/lovoo/goka/storage/merge_iterator.go
new file mode 100644
index 00000000..70b430ac
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/merge_iterator.go
@@ -0,0 +1,143 @@
+package storage
+
+import (
+ "bytes"
+ "container/heap"
+)
+
+type iterHeap []Iterator
+
+func (h iterHeap) Len() int {
+ return len(h)
+}
+
+func (h iterHeap) Less(i, j int) bool {
+ return bytes.Compare(h[i].Key(), h[j].Key()) == -1
+}
+
+func (h iterHeap) Swap(i, j int) {
+ h[i], h[j] = h[j], h[i]
+}
+
+func (h *iterHeap) Push(x interface{}) {
+ *h = append(*h, x.(Iterator))
+}
+
+func (h *iterHeap) Pop() interface{} {
+ dref := *h
+ x := dref[len(dref)-1]
+ *h = dref[:len(dref)-1]
+ return x
+}
+
+type mergeIterator struct {
+ key []byte
+ value []byte
+ err error
+
+ heap iterHeap
+ iters []Iterator
+}
+
+// NewMultiIterator returns an Iterator that iterates over the given subiterators.
+// Iteration happens in lexicographical order given that the subiterators also
+// return values in order.
+func NewMultiIterator(iters []Iterator) Iterator {
+ miter := &mergeIterator{
+ iters: iters,
+ heap: make([]Iterator, 0, len(iters)),
+ }
+
+ miter.buildHeap(func(i Iterator) bool { return i.Next() })
+
+ return miter
+}
+
+func (m *mergeIterator) buildHeap(hasValue func(i Iterator) bool) {
+ m.heap = m.heap[:0]
+
+ for _, iter := range m.iters {
+ if !hasValue(iter) {
+ if m.err = iter.Err(); m.err != nil {
+ return
+ }
+
+ continue
+ }
+
+ heap.Push(&m.heap, iter)
+ }
+}
+
+// Key returns the current key. Caller should not keep references to the
+// buffer or modify its contents.
+func (m *mergeIterator) Key() []byte {
+ return m.key
+}
+
+// Value returns the current value. Caller should not keep references to the
+// buffer or modify its contents.
+func (m *mergeIterator) Value() ([]byte, error) {
+ return m.value, nil
+}
+
+// Seek moves the iterator to the beginning of a key-value pair sequence that
+// is greater or equal to the given key. It returns whether at least one
+// such key-value pairs exist.
+func (m *mergeIterator) Seek(key []byte) bool {
+ if m.err != nil {
+ return false
+ }
+
+ m.buildHeap(func(i Iterator) bool { return i.Seek(key) })
+
+ return m.err == nil && len(m.heap) > 0
+}
+
+// Next advances the iterator to the next key-value pair. If there is no next
+// pair, false is returned. Error should be checked after receiving false by
+// calling Error().
+func (m *mergeIterator) Next() bool {
+ if m.err != nil || len(m.heap) == 0 {
+ return false
+ }
+
+ iter := heap.Pop(&m.heap).(Iterator)
+
+ // cache the values as the underlying iterator might reuse its buffers on
+ // call to Next
+ m.key = append(m.key[:0], iter.Key()...)
+ val, err := iter.Value()
+ if err != nil {
+ m.err = err
+ return false
+ }
+ m.value = append(m.value[:0], val...)
+
+ if iter.Next() {
+ heap.Push(&m.heap, iter)
+ } else if m.err = iter.Err(); m.err != nil {
+ return false
+ }
+
+ return true
+}
+
+// Err returns the possible iteration error.
+func (m *mergeIterator) Err() error {
+ return m.err
+}
+
+// Release frees up the resources used by the iterator. This will also release
+// the subiterators.
+func (m *mergeIterator) Release() {
+ for i := range m.iters {
+ m.iters[i].Release()
+ }
+
+ m.iters = nil
+ m.heap = nil
+ m.key = nil
+ m.value = nil
+ m.err = nil
+}
diff --git a/vendor/github.com/lovoo/goka/storage/null.go b/vendor/github.com/lovoo/goka/storage/null.go
new file mode 100644
index 00000000..c3d81b87
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/null.go
@@ -0,0 +1,100 @@
+package storage
+
+// Null storage discards everything that it is given. This can be useful for
+// debugging.
+type Null struct {
+ recovered bool
+}
+
+// NewNull returns a new Null storage.
+func NewNull() Storage {
+ return new(Null)
+}
+
+// MarkRecovered does nothing.
+func (n *Null) MarkRecovered() error {
+ return nil
+}
+
+// Recovered returns whether the storage has recovered.
+func (n *Null) Recovered() bool {
+ return n.recovered
+}
+
+// Has returns false as in key not found.
+func (n *Null) Has(key string) (bool, error) {
+ return false, nil
+}
+
+// Get returns nil values.
+func (n *Null) Get(key string) ([]byte, error) {
+ return nil, nil
+}
+
+// Set will do nothing and doesn't error.
+func (n *Null) Set(key string, val []byte) error {
+ return nil
+}
+
+// Delete does nothing and doesn't error.
+func (n *Null) Delete(string) error {
+ return nil
+}
+
+// GetOffset returns the default offset given to it.
+func (n *Null) GetOffset(def int64) (int64, error) {
+ return def, nil
+}
+
+// SetOffset does nothing and doesn't error.
+func (n *Null) SetOffset(val int64) error {
+ return nil
+}
+
+// Iterator returns an Iterator that is immediately exhausted.
+func (n *Null) Iterator() (Iterator, error) {
+ return new(NullIter), nil
+}
+
+// IteratorWithRange returns an Iterator that is immediately exhausted.
+func (n *Null) IteratorWithRange(start, limit []byte) (Iterator, error) {
+ return new(NullIter), nil
+}
+
+// Open does nothing and doesn't error.
+func (n *Null) Open() error {
+ return nil
+}
+
+// Close does nothing and doesn't error
+func (n *Null) Close() error {
+ return nil
+}
+
+// NullIter is an iterator which is immediately exhausted.
+type NullIter struct{}
+
+// Next returns always false.
+func (ni *NullIter) Next() bool {
+ return false
+}
+
+func (*NullIter) Err() error {
+ return nil
+}
+
+// Key returns always nil.
+func (ni *NullIter) Key() []byte {
+ return nil
+}
+
+// Value returns always a nil value and no errors.
+func (ni *NullIter) Value() ([]byte, error) {
+ return nil, nil
+}
+
+// Release does nothing.
+func (ni *NullIter) Release() {}
+
+// Seek do nothing
+func (ni *NullIter) Seek(key []byte) bool { return false }
diff --git a/vendor/github.com/lovoo/goka/storage/storage.go b/vendor/github.com/lovoo/goka/storage/storage.go
new file mode 100644
index 00000000..107607c2
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/storage/storage.go
@@ -0,0 +1,204 @@
+package storage
+
+import (
+ "fmt"
+ "strconv"
+
+ "github.com/syndtr/goleveldb/leveldb"
+ ldbiter "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+const (
+ offsetKey = "__offset"
+)
+
+// Iterator provides iteration access to the stored values.
+type Iterator interface {
+ // Next advances the iterator to the next key.
+ Next() bool
+ // Err should be called after Next returns false to check for possible
+ // iteration errors.
+ Err() error
+ // Key gets the current key. If the iterator is exhausted, key will return
+ // nil.
+ Key() []byte
+ // Value gets the current value.
+ Value() ([]byte, error)
+ // Release releases the iterator. After release, the iterator is not usable
+ // anymore.
+ Release()
+ // Seek for a key in the iterator
+ Seek(key []byte) bool
+}
+
+// Storage abstracts the interface for a persistent local storage
+type Storage interface {
+ Has(string) (bool, error)
+ Get(string) ([]byte, error)
+ Set(string, []byte) error
+ Delete(string) error
+ SetOffset(value int64) error
+ GetOffset(defValue int64) (int64, error)
+ Iterator() (Iterator, error)
+ IteratorWithRange(start, limit []byte) (Iterator, error)
+ MarkRecovered() error
+ Recovered() bool
+ Open() error
+ Close() error
+}
+
+// store is the common interface between a transaction and db instance
+type store interface {
+ Has([]byte, *opt.ReadOptions) (bool, error)
+ Get([]byte, *opt.ReadOptions) ([]byte, error)
+ Put([]byte, []byte, *opt.WriteOptions) error
+ Delete([]byte, *opt.WriteOptions) error
+ NewIterator(*util.Range, *opt.ReadOptions) ldbiter.Iterator
+}
+
+type storage struct {
+ // store is the active store, either db or tx
+ store store
+ db *leveldb.DB
+ // tx is the transaction used for recovery
+ tx *leveldb.Transaction
+
+ currentOffset int64
+}
+
+// New creates a new Storage backed by LevelDB.
+func New(db *leveldb.DB) (Storage, error) {
+ tx, err := db.OpenTransaction()
+ if err != nil {
+ return nil, fmt.Errorf("error opening leveldb transaction: %v", err)
+ }
+
+ return &storage{
+ store: tx,
+ db: db,
+ tx: tx,
+ }, nil
+}
+
+// Iterator returns an iterator that traverses over a snapshot of the storage.
+func (s *storage) Iterator() (Iterator, error) {
+ snap, err := s.db.GetSnapshot()
+ if err != nil {
+ return nil, err
+ }
+
+ return &iterator{
+ iter: s.store.NewIterator(nil, nil),
+ snap: snap,
+ }, nil
+}
+
+// Iterator returns an iterator that traverses over a snapshot of the storage.
+func (s *storage) IteratorWithRange(start, limit []byte) (Iterator, error) {
+ snap, err := s.db.GetSnapshot()
+ if err != nil {
+ return nil, err
+ }
+
+ if limit != nil && len(limit) > 0 {
+ return &iterator{
+ iter: s.store.NewIterator(&util.Range{Start: start, Limit: limit}, nil),
+ snap: snap,
+ }, nil
+ }
+ return &iterator{
+ iter: s.store.NewIterator(util.BytesPrefix(start), nil),
+ snap: snap,
+ }, nil
+
+}
+
+func (s *storage) Has(key string) (bool, error) {
+ return s.store.Has([]byte(key), nil)
+}
+
+func (s *storage) Get(key string) ([]byte, error) {
+ if has, err := s.store.Has([]byte(key), nil); err != nil {
+ return nil, fmt.Errorf("error checking for existence in leveldb (key %s): %v", key, err)
+ } else if !has {
+ return nil, nil
+ }
+
+ value, err := s.store.Get([]byte(key), nil)
+ if err == leveldb.ErrNotFound {
+ return nil, nil
+ } else if err != nil {
+ return nil, fmt.Errorf("error getting from leveldb (key %s): %v", key, err)
+ }
+ return value, nil
+}
+
+func (s *storage) GetOffset(defValue int64) (int64, error) {
+ data, err := s.Get(offsetKey)
+ if err != nil {
+ return 0, err
+ }
+
+ if data == nil {
+ return defValue, nil
+ }
+
+ value, err := strconv.ParseInt(string(data), 10, 64)
+ if err != nil {
+ return 0, fmt.Errorf("error decoding offset: %v", err)
+ }
+
+ return value, nil
+}
+
+func (s *storage) Set(key string, value []byte) error {
+ if err := s.store.Put([]byte(key), value, nil); err != nil {
+ return fmt.Errorf("error setting to leveldb (key %s): %v", key, err)
+ }
+ return nil
+}
+
+func (s *storage) SetOffset(offset int64) error {
+ if offset > s.currentOffset {
+ s.currentOffset = offset
+ }
+
+ return s.Set(offsetKey, []byte(strconv.FormatInt(offset, 10)))
+}
+
+func (s *storage) Delete(key string) error {
+ if err := s.store.Delete([]byte(key), nil); err != nil {
+ return fmt.Errorf("error deleting from leveldb (key %s): %v", key, err)
+ }
+
+ return nil
+}
+
+func (s *storage) MarkRecovered() error {
+ if s.store == s.db {
+ return nil
+ }
+
+ s.store = s.db
+ return s.tx.Commit()
+}
+
+func (s *storage) Recovered() bool {
+ return s.store == s.db
+}
+
+func (s *storage) Open() error {
+ return nil
+}
+
+func (s *storage) Close() error {
+ if s.store == s.tx {
+ if err := s.tx.Commit(); err != nil {
+ return fmt.Errorf("error closing transaction: %v", err)
+ }
+ }
+
+ return s.db.Close()
+}
diff --git a/vendor/github.com/lovoo/goka/view.go b/vendor/github.com/lovoo/goka/view.go
new file mode 100644
index 00000000..f4d9e892
--- /dev/null
+++ b/vendor/github.com/lovoo/goka/view.go
@@ -0,0 +1,408 @@
+package goka
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "sync"
+
+ "github.com/lovoo/goka/kafka"
+ "github.com/lovoo/goka/logger"
+ "github.com/lovoo/goka/multierr"
+ "github.com/lovoo/goka/storage"
+)
+
+// Getter functions return a value for a key or an error. If no value exists for the key, nil is returned without errors.
+type Getter func(string) (interface{}, error)
+
+// View is a materialized (i.e. persistent) cache of a group table.
+type View struct {
+ brokers []string
+ topic string
+ opts *voptions
+ partitions []*partition
+ consumer kafka.Consumer
+ terminated bool
+}
+
+// NewView creates a new View object from a group.
+func NewView(brokers []string, topic Table, codec Codec, options ...ViewOption) (*View, error) {
+ options = append(
+ // default options comes first
+ []ViewOption{
+ WithViewLogger(logger.Default()),
+ WithViewCallback(DefaultUpdate),
+ WithViewPartitionChannelSize(defaultPartitionChannelSize),
+ WithViewStorageBuilder(storage.DefaultBuilder(DefaultViewStoragePath())),
+ },
+
+ // then the user passed options
+ options...,
+ )
+
+ // figure out how many partitions the group has
+ opts := new(voptions)
+ err := opts.applyOptions(topic, codec, options...)
+ if err != nil {
+ return nil, fmt.Errorf("Error applying user-defined options: %v", err)
+ }
+
+ opts.tableCodec = codec
+
+ v := &View{
+ brokers: brokers,
+ topic: string(topic),
+ opts: opts,
+ }
+
+ if err = v.createPartitions(brokers); err != nil {
+ return nil, err
+ }
+
+ return v, err
+}
+
+func (v *View) createPartitions(brokers []string) (rerr error) {
+ tm, err := v.opts.builders.topicmgr(brokers)
+ if err != nil {
+ return fmt.Errorf("Error creating topic manager: %v", err)
+ }
+ defer func() {
+ e := tm.Close()
+ if e != nil && rerr == nil {
+ rerr = fmt.Errorf("Error closing topic manager: %v", e)
+ }
+ }()
+
+ partitions, err := tm.Partitions(v.topic)
+ if err != nil {
+ return fmt.Errorf("Error getting partitions for topic %s: %v", v.topic, err)
+ }
+
+ // check assumption that partitions are gap-less
+ for i, p := range partitions {
+ if i != int(p) {
+ return fmt.Errorf("Partition numbers are not sequential for topic %s", v.topic)
+ }
+ }
+
+ v.opts.log.Printf("Table %s has %d partitions", v.topic, len(partitions))
+ for _, p := range partitions {
+ st, err := v.opts.builders.storage(v.topic, p)
+ if err != nil {
+ // TODO(diogo): gracefully terminate all partitions
+ return fmt.Errorf("Error creating local storage for partition %d: %v", p, err)
+ }
+
+ po := newPartition(v.opts.log, v.topic, nil,
+ &storageProxy{Storage: st, partition: p, update: v.opts.updateCallback},
+ &proxy{p, nil},
+ v.opts.partitionChannelSize,
+ )
+ v.partitions = append(v.partitions, po)
+ }
+
+ return nil
+}
+
+// reinit (re)initializes the view and its partitions to connect to Kafka
+func (v *View) reinit() error {
+ if v.terminated {
+ return fmt.Errorf("view: cannot reinitialize terminated view")
+ }
+
+ consumer, err := v.opts.builders.consumer(v.brokers, "goka-view", v.opts.clientID)
+ if err != nil {
+ return fmt.Errorf("view: cannot create Kafka consumer: %v", err)
+ }
+ v.consumer = consumer
+
+ for i, p := range v.partitions {
+ p.reinit(&proxy{int32(i), v.consumer})
+ }
+ return nil
+}
+
+// Run starts consuming the view's topic.
+func (v *View) Run(ctx context.Context) error {
+ v.opts.log.Printf("view [%s]: starting", v.Topic())
+ defer v.opts.log.Printf("view [%s]: stopped", v.Topic())
+
+ if err := v.reinit(); err != nil {
+ return err
+ }
+
+ errg, ctx := multierr.NewErrGroup(ctx)
+ errg.Go(func() error { return v.run(ctx) })
+
+ for id, p := range v.partitions {
+ pid, par := int32(id), p
+ errg.Go(func() error {
+ v.opts.log.Printf("view [%s]: partition %d started", v.Topic(), pid)
+ defer v.opts.log.Printf("view [%s]: partition %d stopped", v.Topic(), pid)
+ if err := par.st.Open(); err != nil {
+ return fmt.Errorf("view [%s]: error opening storage partition %d: %v", v.Topic(), pid, err)
+ }
+ if err := par.startCatchup(ctx); err != nil {
+ return fmt.Errorf("view [%s]: error running partition %d: %v", v.Topic(), pid, err)
+ }
+ return nil
+ })
+ }
+
+ // wait for partition goroutines and shutdown
+ errs := errg.Wait()
+
+ v.opts.log.Printf("view [%s]: closing consumer", v.Topic())
+ if err := v.consumer.Close(); err != nil {
+ _ = errs.Collect(fmt.Errorf("view [%s]: failed closing consumer: %v", v.Topic(), err))
+ }
+
+ if !v.opts.restartable {
+ v.terminated = true
+ errs = errs.Merge(v.close())
+ }
+
+ return errs.NilOrError()
+}
+
+// close closes all storage partitions
+func (v *View) close() *multierr.Errors {
+ errs := new(multierr.Errors)
+ for _, p := range v.partitions {
+ _ = errs.Collect(p.st.Close())
+ }
+ v.partitions = nil
+ return errs
+}
+
+// Terminate closes storage partitions. It must be called only if the view is
+// restartable (see WithViewRestartable() option). Once Terminate() is called,
+// the view cannot be restarted anymore.
+func (v *View) Terminate() error {
+ if !v.opts.restartable {
+ return nil
+ }
+ v.opts.log.Printf("View: closing")
+
+ // do not allow any reinitialization
+ if v.terminated {
+ return nil
+ }
+ v.terminated = true
+
+ if v.opts.restartable {
+ return v.close().NilOrError()
+ }
+ return nil
+}
+
+func (v *View) hash(key string) (int32, error) {
+ // create a new hasher every time. Alternative would be to store the hash in
+ // view and every time reset the hasher (ie, hasher.Reset()). But that would
+ // also require us to protect the access of the hasher with a mutex.
+ hasher := v.opts.hasher()
+
+ _, err := hasher.Write([]byte(key))
+ if err != nil {
+ return -1, err
+ }
+ hash := int32(hasher.Sum32())
+ if hash < 0 {
+ hash = -hash
+ }
+ if len(v.partitions) == 0 {
+ return 0, errors.New("no partitions found")
+ }
+ return hash % int32(len(v.partitions)), nil
+}
+
+func (v *View) find(key string) (storage.Storage, error) {
+ h, err := v.hash(key)
+ if err != nil {
+ return nil, err
+ }
+ return v.partitions[h].st, nil
+}
+
+// Topic returns the view's topic
+func (v *View) Topic() string {
+ return v.topic
+}
+
+// Get returns the value for the key in the view, if exists. Nil if it doesn't.
+// Get can be called by multiple goroutines concurrently.
+// Get can only be called after Recovered returns true.
+func (v *View) Get(key string) (interface{}, error) {
+ // find partition where key is located
+ s, err := v.find(key)
+ if err != nil {
+ return nil, err
+ }
+
+ // get key and return
+ data, err := s.Get(key)
+ if err != nil {
+ return nil, fmt.Errorf("error getting value (key %s): %v", key, err)
+ } else if data == nil {
+ return nil, nil
+ }
+
+ // decode value
+ value, err := v.opts.tableCodec.Decode(data)
+ if err != nil {
+ return nil, fmt.Errorf("error decoding value (key %s): %v", key, err)
+ }
+
+ // if the key does not exist the return value is nil
+ return value, nil
+}
+
+// Has checks whether a value for passed key exists in the view.
+func (v *View) Has(key string) (bool, error) {
+ // find partition where key is located
+ s, err := v.find(key)
+ if err != nil {
+ return false, err
+ }
+
+ return s.Has(key)
+}
+
+// Iterator returns an iterator that iterates over the state of the View.
+func (v *View) Iterator() (Iterator, error) {
+ iters := make([]storage.Iterator, 0, len(v.partitions))
+ for i := range v.partitions {
+ iter, err := v.partitions[i].st.Iterator()
+ if err != nil {
+ // release already opened iterators
+ for i := range iters {
+ iters[i].Release()
+ }
+
+ return nil, fmt.Errorf("error opening partition iterator: %v", err)
+ }
+
+ iters = append(iters, iter)
+ }
+
+ return &iterator{
+ iter: storage.NewMultiIterator(iters),
+ codec: v.opts.tableCodec,
+ }, nil
+}
+
+// IteratorWithRange returns an iterator that iterates over the state of the View. This iterator is build using the range.
+func (v *View) IteratorWithRange(start, limit string) (Iterator, error) {
+ iters := make([]storage.Iterator, 0, len(v.partitions))
+ for i := range v.partitions {
+ iter, err := v.partitions[i].st.IteratorWithRange([]byte(start), []byte(limit))
+ if err != nil {
+ // release already opened iterators
+ for i := range iters {
+ iters[i].Release()
+ }
+
+ return nil, fmt.Errorf("error opening partition iterator: %v", err)
+ }
+
+ iters = append(iters, iter)
+ }
+
+ return &iterator{
+ iter: storage.NewMultiIterator(iters),
+ codec: v.opts.tableCodec,
+ }, nil
+}
+
+// Evict removes the given key only from the local cache. In order to delete a
+// key from Kafka and other Views, context.Delete should be used on a Processor.
+func (v *View) Evict(key string) error {
+ s, err := v.find(key)
+ if err != nil {
+ return err
+ }
+
+ return s.Delete(key)
+}
+
+func (v *View) run(ctx context.Context) error {
+ for {
+ select {
+ case ev := <-v.consumer.Events():
+ switch ev := ev.(type) {
+ case *kafka.Message:
+ partition := v.partitions[int(ev.Partition)]
+ select {
+ case partition.ch <- ev:
+ case <-ctx.Done():
+ return nil
+ }
+ case *kafka.BOF:
+ partition := v.partitions[int(ev.Partition)]
+ select {
+ case partition.ch <- ev:
+ case <-ctx.Done():
+ return nil
+ }
+ case *kafka.EOF:
+ partition := v.partitions[int(ev.Partition)]
+ select {
+ case partition.ch <- ev:
+ case <-ctx.Done():
+ return nil
+ }
+ case *kafka.NOP:
+ partition := v.partitions[int(ev.Partition)]
+ select {
+ case partition.ch <- ev:
+ case <-ctx.Done():
+ return nil
+ }
+ case *kafka.Error:
+ return fmt.Errorf("view: error from kafka consumer: %v", ev)
+ default:
+ return fmt.Errorf("view: cannot handle %T = %v", ev, ev)
+ }
+ case <-ctx.Done():
+ return nil
+ }
+ }
+}
+
+// Recovered returns true when the view has caught up with events from kafka.
+func (v *View) Recovered() bool {
+ for _, p := range v.partitions {
+ if !p.recovered() {
+ return false
+ }
+ }
+
+ return true
+}
+
+// Stats returns a set of performance metrics of the view.
+func (v *View) Stats() *ViewStats {
+ return v.statsWithContext(context.Background())
+}
+
+func (v *View) statsWithContext(ctx context.Context) *ViewStats {
+ var (
+ m sync.Mutex
+ wg sync.WaitGroup
+ stats = newViewStats()
+ )
+
+ wg.Add(len(v.partitions))
+ for i, p := range v.partitions {
+ go func(pid int32, par *partition) {
+ s := par.fetchStats(ctx)
+ m.Lock()
+ stats.Partitions[pid] = s
+ m.Unlock()
+ wg.Done()
+ }(int32(i), p)
+ }
+ wg.Wait()
+ return stats
+}
diff --git a/vendor/github.com/pkg/errors/.gitignore b/vendor/github.com/pkg/errors/.gitignore
new file mode 100644
index 00000000..daf913b1
--- /dev/null
+++ b/vendor/github.com/pkg/errors/.gitignore
@@ -0,0 +1,24 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
diff --git a/vendor/github.com/pkg/errors/.travis.yml b/vendor/github.com/pkg/errors/.travis.yml
new file mode 100644
index 00000000..9159de03
--- /dev/null
+++ b/vendor/github.com/pkg/errors/.travis.yml
@@ -0,0 +1,10 @@
+language: go
+go_import_path: github.com/pkg/errors
+go:
+ - 1.11.x
+ - 1.12.x
+ - 1.13.x
+ - tip
+
+script:
+ - make check
diff --git a/vendor/github.com/pkg/errors/LICENSE b/vendor/github.com/pkg/errors/LICENSE
new file mode 100644
index 00000000..835ba3e7
--- /dev/null
+++ b/vendor/github.com/pkg/errors/LICENSE
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Dave Cheney
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/pkg/errors/Makefile b/vendor/github.com/pkg/errors/Makefile
new file mode 100644
index 00000000..ce9d7cde
--- /dev/null
+++ b/vendor/github.com/pkg/errors/Makefile
@@ -0,0 +1,44 @@
+PKGS := github.com/pkg/errors
+SRCDIRS := $(shell go list -f '{{.Dir}}' $(PKGS))
+GO := go
+
+check: test vet gofmt misspell unconvert staticcheck ineffassign unparam
+
+test:
+ $(GO) test $(PKGS)
+
+vet: | test
+ $(GO) vet $(PKGS)
+
+staticcheck:
+ $(GO) get honnef.co/go/tools/cmd/staticcheck
+ staticcheck -checks all $(PKGS)
+
+misspell:
+ $(GO) get github.com/client9/misspell/cmd/misspell
+ misspell \
+ -locale GB \
+ -error \
+ *.md *.go
+
+unconvert:
+ $(GO) get github.com/mdempsky/unconvert
+ unconvert -v $(PKGS)
+
+ineffassign:
+ $(GO) get github.com/gordonklaus/ineffassign
+ find $(SRCDIRS) -name '*.go' | xargs ineffassign
+
+pedantic: check errcheck
+
+unparam:
+ $(GO) get mvdan.cc/unparam
+ unparam ./...
+
+errcheck:
+ $(GO) get github.com/kisielk/errcheck
+ errcheck $(PKGS)
+
+gofmt:
+ @echo Checking code is gofmted
+ @test -z "$(shell gofmt -s -l -d -e $(SRCDIRS) | tee /dev/stderr)"
diff --git a/vendor/github.com/pkg/errors/README.md b/vendor/github.com/pkg/errors/README.md
new file mode 100644
index 00000000..54dfdcb1
--- /dev/null
+++ b/vendor/github.com/pkg/errors/README.md
@@ -0,0 +1,59 @@
+# errors [![Travis-CI](https://travis-ci.org/pkg/errors.svg)](https://travis-ci.org/pkg/errors) [![AppVeyor](https://ci.appveyor.com/api/projects/status/b98mptawhudj53ep/branch/master?svg=true)](https://ci.appveyor.com/project/davecheney/errors/branch/master) [![GoDoc](https://godoc.org/github.com/pkg/errors?status.svg)](http://godoc.org/github.com/pkg/errors) [![Report card](https://goreportcard.com/badge/github.com/pkg/errors)](https://goreportcard.com/report/github.com/pkg/errors) [![Sourcegraph](https://sourcegraph.com/github.com/pkg/errors/-/badge.svg)](https://sourcegraph.com/github.com/pkg/errors?badge)
+
+Package errors provides simple error handling primitives.
+
+`go get github.com/pkg/errors`
+
+The traditional error handling idiom in Go is roughly akin to
+```go
+if err != nil {
+ return err
+}
+```
+which applied recursively up the call stack results in error reports without context or debugging information. The errors package allows programmers to add context to the failure path in their code in a way that does not destroy the original value of the error.
+
+## Adding context to an error
+
+The errors.Wrap function returns a new error that adds context to the original error. For example
+```go
+_, err := ioutil.ReadAll(r)
+if err != nil {
+ return errors.Wrap(err, "read failed")
+}
+```
+## Retrieving the cause of an error
+
+Using `errors.Wrap` constructs a stack of errors, adding context to the preceding error. Depending on the nature of the error it may be necessary to reverse the operation of errors.Wrap to retrieve the original error for inspection. Any error value which implements this interface can be inspected by `errors.Cause`.
+```go
+type causer interface {
+ Cause() error
+}
+```
+`errors.Cause` will recursively retrieve the topmost error which does not implement `causer`, which is assumed to be the original cause. For example:
+```go
+switch err := errors.Cause(err).(type) {
+case *MyError:
+ // handle specifically
+default:
+ // unknown error
+}
+```
+
+[Read the package documentation for more information](https://godoc.org/github.com/pkg/errors).
+
+## Roadmap
+
+With the upcoming [Go2 error proposals](https://go.googlesource.com/proposal/+/master/design/go2draft.md) this package is moving into maintenance mode. The roadmap for a 1.0 release is as follows:
+
+- 0.9. Remove pre Go 1.9 and Go 1.10 support, address outstanding pull requests (if possible)
+- 1.0. Final release.
+
+## Contributing
+
+Because of the Go2 errors changes, this package is not accepting proposals for new functionality. With that said, we welcome pull requests, bug fixes and issue reports.
+
+Before sending a PR, please discuss your change by raising an issue.
+
+## License
+
+BSD-2-Clause
diff --git a/vendor/github.com/pkg/errors/appveyor.yml b/vendor/github.com/pkg/errors/appveyor.yml
new file mode 100644
index 00000000..a932eade
--- /dev/null
+++ b/vendor/github.com/pkg/errors/appveyor.yml
@@ -0,0 +1,32 @@
+version: build-{build}.{branch}
+
+clone_folder: C:\gopath\src\github.com\pkg\errors
+shallow_clone: true # for startup speed
+
+environment:
+ GOPATH: C:\gopath
+
+platform:
+ - x64
+
+# http://www.appveyor.com/docs/installed-software
+install:
+ # some helpful output for debugging builds
+ - go version
+ - go env
+ # pre-installed MinGW at C:\MinGW is 32bit only
+ # but MSYS2 at C:\msys64 has mingw64
+ - set PATH=C:\msys64\mingw64\bin;%PATH%
+ - gcc --version
+ - g++ --version
+
+build_script:
+ - go install -v ./...
+
+test_script:
+ - set PATH=C:\gopath\bin;%PATH%
+ - go test -v ./...
+
+#artifacts:
+# - path: '%GOPATH%\bin\*.exe'
+deploy: off
diff --git a/vendor/github.com/pkg/errors/errors.go b/vendor/github.com/pkg/errors/errors.go
new file mode 100644
index 00000000..161aea25
--- /dev/null
+++ b/vendor/github.com/pkg/errors/errors.go
@@ -0,0 +1,288 @@
+// Package errors provides simple error handling primitives.
+//
+// The traditional error handling idiom in Go is roughly akin to
+//
+// if err != nil {
+// return err
+// }
+//
+// which when applied recursively up the call stack results in error reports
+// without context or debugging information. The errors package allows
+// programmers to add context to the failure path in their code in a way
+// that does not destroy the original value of the error.
+//
+// Adding context to an error
+//
+// The errors.Wrap function returns a new error that adds context to the
+// original error by recording a stack trace at the point Wrap is called,
+// together with the supplied message. For example
+//
+// _, err := ioutil.ReadAll(r)
+// if err != nil {
+// return errors.Wrap(err, "read failed")
+// }
+//
+// If additional control is required, the errors.WithStack and
+// errors.WithMessage functions destructure errors.Wrap into its component
+// operations: annotating an error with a stack trace and with a message,
+// respectively.
+//
+// Retrieving the cause of an error
+//
+// Using errors.Wrap constructs a stack of errors, adding context to the
+// preceding error. Depending on the nature of the error it may be necessary
+// to reverse the operation of errors.Wrap to retrieve the original error
+// for inspection. Any error value which implements this interface
+//
+// type causer interface {
+// Cause() error
+// }
+//
+// can be inspected by errors.Cause. errors.Cause will recursively retrieve
+// the topmost error that does not implement causer, which is assumed to be
+// the original cause. For example:
+//
+// switch err := errors.Cause(err).(type) {
+// case *MyError:
+// // handle specifically
+// default:
+// // unknown error
+// }
+//
+// Although the causer interface is not exported by this package, it is
+// considered a part of its stable public interface.
+//
+// Formatted printing of errors
+//
+// All error values returned from this package implement fmt.Formatter and can
+// be formatted by the fmt package. The following verbs are supported:
+//
+// %s print the error. If the error has a Cause it will be
+// printed recursively.
+// %v see %s
+// %+v extended format. Each Frame of the error's StackTrace will
+// be printed in detail.
+//
+// Retrieving the stack trace of an error or wrapper
+//
+// New, Errorf, Wrap, and Wrapf record a stack trace at the point they are
+// invoked. This information can be retrieved with the following interface:
+//
+// type stackTracer interface {
+// StackTrace() errors.StackTrace
+// }
+//
+// The returned errors.StackTrace type is defined as
+//
+// type StackTrace []Frame
+//
+// The Frame type represents a call site in the stack trace. Frame supports
+// the fmt.Formatter interface that can be used for printing information about
+// the stack trace of this error. For example:
+//
+// if err, ok := err.(stackTracer); ok {
+// for _, f := range err.StackTrace() {
+// fmt.Printf("%+s:%d\n", f, f)
+// }
+// }
+//
+// Although the stackTracer interface is not exported by this package, it is
+// considered a part of its stable public interface.
+//
+// See the documentation for Frame.Format for more details.
+package errors
+
+import (
+ "fmt"
+ "io"
+)
+
+// New returns an error with the supplied message.
+// New also records the stack trace at the point it was called.
+func New(message string) error {
+ return &fundamental{
+ msg: message,
+ stack: callers(),
+ }
+}
+
+// Errorf formats according to a format specifier and returns the string
+// as a value that satisfies error.
+// Errorf also records the stack trace at the point it was called.
+func Errorf(format string, args ...interface{}) error {
+ return &fundamental{
+ msg: fmt.Sprintf(format, args...),
+ stack: callers(),
+ }
+}
+
+// fundamental is an error that has a message and a stack, but no caller.
+type fundamental struct {
+ msg string
+ *stack
+}
+
+func (f *fundamental) Error() string { return f.msg }
+
+func (f *fundamental) Format(s fmt.State, verb rune) {
+ switch verb {
+ case 'v':
+ if s.Flag('+') {
+ io.WriteString(s, f.msg)
+ f.stack.Format(s, verb)
+ return
+ }
+ fallthrough
+ case 's':
+ io.WriteString(s, f.msg)
+ case 'q':
+ fmt.Fprintf(s, "%q", f.msg)
+ }
+}
+
+// WithStack annotates err with a stack trace at the point WithStack was called.
+// If err is nil, WithStack returns nil.
+func WithStack(err error) error {
+ if err == nil {
+ return nil
+ }
+ return &withStack{
+ err,
+ callers(),
+ }
+}
+
+type withStack struct {
+ error
+ *stack
+}
+
+func (w *withStack) Cause() error { return w.error }
+
+// Unwrap provides compatibility for Go 1.13 error chains.
+func (w *withStack) Unwrap() error { return w.error }
+
+func (w *withStack) Format(s fmt.State, verb rune) {
+ switch verb {
+ case 'v':
+ if s.Flag('+') {
+ fmt.Fprintf(s, "%+v", w.Cause())
+ w.stack.Format(s, verb)
+ return
+ }
+ fallthrough
+ case 's':
+ io.WriteString(s, w.Error())
+ case 'q':
+ fmt.Fprintf(s, "%q", w.Error())
+ }
+}
+
+// Wrap returns an error annotating err with a stack trace
+// at the point Wrap is called, and the supplied message.
+// If err is nil, Wrap returns nil.
+func Wrap(err error, message string) error {
+ if err == nil {
+ return nil
+ }
+ err = &withMessage{
+ cause: err,
+ msg: message,
+ }
+ return &withStack{
+ err,
+ callers(),
+ }
+}
+
+// Wrapf returns an error annotating err with a stack trace
+// at the point Wrapf is called, and the format specifier.
+// If err is nil, Wrapf returns nil.
+func Wrapf(err error, format string, args ...interface{}) error {
+ if err == nil {
+ return nil
+ }
+ err = &withMessage{
+ cause: err,
+ msg: fmt.Sprintf(format, args...),
+ }
+ return &withStack{
+ err,
+ callers(),
+ }
+}
+
+// WithMessage annotates err with a new message.
+// If err is nil, WithMessage returns nil.
+func WithMessage(err error, message string) error {
+ if err == nil {
+ return nil
+ }
+ return &withMessage{
+ cause: err,
+ msg: message,
+ }
+}
+
+// WithMessagef annotates err with the format specifier.
+// If err is nil, WithMessagef returns nil.
+func WithMessagef(err error, format string, args ...interface{}) error {
+ if err == nil {
+ return nil
+ }
+ return &withMessage{
+ cause: err,
+ msg: fmt.Sprintf(format, args...),
+ }
+}
+
+type withMessage struct {
+ cause error
+ msg string
+}
+
+func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() }
+func (w *withMessage) Cause() error { return w.cause }
+
+// Unwrap provides compatibility for Go 1.13 error chains.
+func (w *withMessage) Unwrap() error { return w.cause }
+
+func (w *withMessage) Format(s fmt.State, verb rune) {
+ switch verb {
+ case 'v':
+ if s.Flag('+') {
+ fmt.Fprintf(s, "%+v\n", w.Cause())
+ io.WriteString(s, w.msg)
+ return
+ }
+ fallthrough
+ case 's', 'q':
+ io.WriteString(s, w.Error())
+ }
+}
+
+// Cause returns the underlying cause of the error, if possible.
+// An error value has a cause if it implements the following
+// interface:
+//
+// type causer interface {
+// Cause() error
+// }
+//
+// If the error does not implement Cause, the original error will
+// be returned. If the error is nil, nil will be returned without further
+// investigation.
+func Cause(err error) error {
+ type causer interface {
+ Cause() error
+ }
+
+ for err != nil {
+ cause, ok := err.(causer)
+ if !ok {
+ break
+ }
+ err = cause.Cause()
+ }
+ return err
+}
diff --git a/vendor/github.com/pkg/errors/go113.go b/vendor/github.com/pkg/errors/go113.go
new file mode 100644
index 00000000..be0d10d0
--- /dev/null
+++ b/vendor/github.com/pkg/errors/go113.go
@@ -0,0 +1,38 @@
+// +build go1.13
+
+package errors
+
+import (
+ stderrors "errors"
+)
+
+// Is reports whether any error in err's chain matches target.
+//
+// The chain consists of err itself followed by the sequence of errors obtained by
+// repeatedly calling Unwrap.
+//
+// An error is considered to match a target if it is equal to that target or if
+// it implements a method Is(error) bool such that Is(target) returns true.
+func Is(err, target error) bool { return stderrors.Is(err, target) }
+
+// As finds the first error in err's chain that matches target, and if so, sets
+// target to that error value and returns true.
+//
+// The chain consists of err itself followed by the sequence of errors obtained by
+// repeatedly calling Unwrap.
+//
+// An error matches target if the error's concrete value is assignable to the value
+// pointed to by target, or if the error has a method As(interface{}) bool such that
+// As(target) returns true. In the latter case, the As method is responsible for
+// setting target.
+//
+// As will panic if target is not a non-nil pointer to either a type that implements
+// error, or to any interface type. As returns false if err is nil.
+func As(err error, target interface{}) bool { return stderrors.As(err, target) }
+
+// Unwrap returns the result of calling the Unwrap method on err, if err's
+// type contains an Unwrap method returning error.
+// Otherwise, Unwrap returns nil.
+func Unwrap(err error) error {
+ return stderrors.Unwrap(err)
+}
diff --git a/vendor/github.com/pkg/errors/stack.go b/vendor/github.com/pkg/errors/stack.go
new file mode 100644
index 00000000..779a8348
--- /dev/null
+++ b/vendor/github.com/pkg/errors/stack.go
@@ -0,0 +1,177 @@
+package errors
+
+import (
+ "fmt"
+ "io"
+ "path"
+ "runtime"
+ "strconv"
+ "strings"
+)
+
+// Frame represents a program counter inside a stack frame.
+// For historical reasons if Frame is interpreted as a uintptr
+// its value represents the program counter + 1.
+type Frame uintptr
+
+// pc returns the program counter for this frame;
+// multiple frames may have the same PC value.
+func (f Frame) pc() uintptr { return uintptr(f) - 1 }
+
+// file returns the full path to the file that contains the
+// function for this Frame's pc.
+func (f Frame) file() string {
+ fn := runtime.FuncForPC(f.pc())
+ if fn == nil {
+ return "unknown"
+ }
+ file, _ := fn.FileLine(f.pc())
+ return file
+}
+
+// line returns the line number of source code of the
+// function for this Frame's pc.
+func (f Frame) line() int {
+ fn := runtime.FuncForPC(f.pc())
+ if fn == nil {
+ return 0
+ }
+ _, line := fn.FileLine(f.pc())
+ return line
+}
+
+// name returns the name of this function, if known.
+func (f Frame) name() string {
+ fn := runtime.FuncForPC(f.pc())
+ if fn == nil {
+ return "unknown"
+ }
+ return fn.Name()
+}
+
+// Format formats the frame according to the fmt.Formatter interface.
+//
+// %s source file
+// %d source line
+// %n function name
+// %v equivalent to %s:%d
+//
+// Format accepts flags that alter the printing of some verbs, as follows:
+//
+// %+s function name and path of source file relative to the compile time
+// GOPATH separated by \n\t (\n\t)
+// %+v equivalent to %+s:%d
+func (f Frame) Format(s fmt.State, verb rune) {
+ switch verb {
+ case 's':
+ switch {
+ case s.Flag('+'):
+ io.WriteString(s, f.name())
+ io.WriteString(s, "\n\t")
+ io.WriteString(s, f.file())
+ default:
+ io.WriteString(s, path.Base(f.file()))
+ }
+ case 'd':
+ io.WriteString(s, strconv.Itoa(f.line()))
+ case 'n':
+ io.WriteString(s, funcname(f.name()))
+ case 'v':
+ f.Format(s, 's')
+ io.WriteString(s, ":")
+ f.Format(s, 'd')
+ }
+}
+
+// MarshalText formats a stacktrace Frame as a text string. The output is the
+// same as that of fmt.Sprintf("%+v", f), but without newlines or tabs.
+func (f Frame) MarshalText() ([]byte, error) {
+ name := f.name()
+ if name == "unknown" {
+ return []byte(name), nil
+ }
+ return []byte(fmt.Sprintf("%s %s:%d", name, f.file(), f.line())), nil
+}
+
+// StackTrace is stack of Frames from innermost (newest) to outermost (oldest).
+type StackTrace []Frame
+
+// Format formats the stack of Frames according to the fmt.Formatter interface.
+//
+// %s lists source files for each Frame in the stack
+// %v lists the source file and line number for each Frame in the stack
+//
+// Format accepts flags that alter the printing of some verbs, as follows:
+//
+// %+v Prints filename, function, and line number for each Frame in the stack.
+func (st StackTrace) Format(s fmt.State, verb rune) {
+ switch verb {
+ case 'v':
+ switch {
+ case s.Flag('+'):
+ for _, f := range st {
+ io.WriteString(s, "\n")
+ f.Format(s, verb)
+ }
+ case s.Flag('#'):
+ fmt.Fprintf(s, "%#v", []Frame(st))
+ default:
+ st.formatSlice(s, verb)
+ }
+ case 's':
+ st.formatSlice(s, verb)
+ }
+}
+
+// formatSlice will format this StackTrace into the given buffer as a slice of
+// Frame, only valid when called with '%s' or '%v'.
+func (st StackTrace) formatSlice(s fmt.State, verb rune) {
+ io.WriteString(s, "[")
+ for i, f := range st {
+ if i > 0 {
+ io.WriteString(s, " ")
+ }
+ f.Format(s, verb)
+ }
+ io.WriteString(s, "]")
+}
+
+// stack represents a stack of program counters.
+type stack []uintptr
+
+func (s *stack) Format(st fmt.State, verb rune) {
+ switch verb {
+ case 'v':
+ switch {
+ case st.Flag('+'):
+ for _, pc := range *s {
+ f := Frame(pc)
+ fmt.Fprintf(st, "\n%+v", f)
+ }
+ }
+ }
+}
+
+func (s *stack) StackTrace() StackTrace {
+ f := make([]Frame, len(*s))
+ for i := 0; i < len(f); i++ {
+ f[i] = Frame((*s)[i])
+ }
+ return f
+}
+
+func callers() *stack {
+ const depth = 32
+ var pcs [depth]uintptr
+ n := runtime.Callers(3, pcs[:])
+ var st stack = pcs[0:n]
+ return &st
+}
+
+// funcname removes the path prefix component of a function's name reported by func.Name().
+func funcname(name string) string {
+ i := strings.LastIndex(name, "/")
+ name = name[i+1:]
+ i = strings.Index(name, ".")
+ return name[i+1:]
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/LICENSE b/vendor/github.com/samuel/go-zookeeper/LICENSE
new file mode 100644
index 00000000..bc00498c
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/LICENSE
@@ -0,0 +1,25 @@
+Copyright (c) 2013, Samuel Stauffer
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+* Neither the name of the author nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/conn.go b/vendor/github.com/samuel/go-zookeeper/zk/conn.go
new file mode 100644
index 00000000..da9503a2
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/conn.go
@@ -0,0 +1,1278 @@
+// Package zk is a native Go client library for the ZooKeeper orchestration service.
+package zk
+
+/*
+TODO:
+* make sure a ping response comes back in a reasonable time
+
+Possible watcher events:
+* Event{Type: EventNotWatching, State: StateDisconnected, Path: path, Err: err}
+*/
+
+import (
+ "crypto/rand"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+// ErrNoServer indicates that an operation cannot be completed
+// because attempts to connect to all servers in the list failed.
+var ErrNoServer = errors.New("zk: could not connect to a server")
+
+// ErrInvalidPath indicates that an operation was being attempted on
+// an invalid path. (e.g. empty path)
+var ErrInvalidPath = errors.New("zk: invalid path")
+
+// DefaultLogger uses the stdlib log package for logging.
+var DefaultLogger Logger = defaultLogger{}
+
+const (
+ bufferSize = 1536 * 1024
+ eventChanSize = 6
+ sendChanSize = 16
+ protectedPrefix = "_c_"
+)
+
+type watchType int
+
+const (
+ watchTypeData = iota
+ watchTypeExist
+ watchTypeChild
+)
+
+type watchPathType struct {
+ path string
+ wType watchType
+}
+
+type Dialer func(network, address string, timeout time.Duration) (net.Conn, error)
+
+// Logger is an interface that can be implemented to provide custom log output.
+type Logger interface {
+ Printf(string, ...interface{})
+}
+
+type authCreds struct {
+ scheme string
+ auth []byte
+}
+
+type Conn struct {
+ lastZxid int64
+ sessionID int64
+ state State // must be 32-bit aligned
+ xid uint32
+ sessionTimeoutMs int32 // session timeout in milliseconds
+ passwd []byte
+
+ dialer Dialer
+ hostProvider HostProvider
+ serverMu sync.Mutex // protects server
+ server string // remember the address/port of the current server
+ conn net.Conn
+ eventChan chan Event
+ eventCallback EventCallback // may be nil
+ shouldQuit chan struct{}
+ pingInterval time.Duration
+ recvTimeout time.Duration
+ connectTimeout time.Duration
+ maxBufferSize int
+
+ creds []authCreds
+ credsMu sync.Mutex // protects server
+
+ sendChan chan *request
+ requests map[int32]*request // Xid -> pending request
+ requestsLock sync.Mutex
+ watchers map[watchPathType][]chan Event
+ watchersLock sync.Mutex
+ closeChan chan struct{} // channel to tell send loop stop
+
+ // Debug (used by unit tests)
+ reconnectLatch chan struct{}
+ setWatchLimit int
+ setWatchCallback func([]*setWatchesRequest)
+ // Debug (for recurring re-auth hang)
+ debugCloseRecvLoop bool
+ debugReauthDone chan struct{}
+
+ logger Logger
+ logInfo bool // true if information messages are logged; false if only errors are logged
+
+ buf []byte
+}
+
+// connOption represents a connection option.
+type connOption func(c *Conn)
+
+type request struct {
+ xid int32
+ opcode int32
+ pkt interface{}
+ recvStruct interface{}
+ recvChan chan response
+
+ // Because sending and receiving happen in separate go routines, there's
+ // a possible race condition when creating watches from outside the read
+ // loop. We must ensure that a watcher gets added to the list synchronously
+ // with the response from the server on any request that creates a watch.
+ // In order to not hard code the watch logic for each opcode in the recv
+ // loop the caller can use recvFunc to insert some synchronously code
+ // after a response.
+ recvFunc func(*request, *responseHeader, error)
+}
+
+type response struct {
+ zxid int64
+ err error
+}
+
+type Event struct {
+ Type EventType
+ State State
+ Path string // For non-session events, the path of the watched node.
+ Err error
+ Server string // For connection events
+}
+
+// HostProvider is used to represent a set of hosts a ZooKeeper client should connect to.
+// It is an analog of the Java equivalent:
+// http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/client/HostProvider.java?view=markup
+type HostProvider interface {
+ // Init is called first, with the servers specified in the connection string.
+ Init(servers []string) error
+ // Len returns the number of servers.
+ Len() int
+ // Next returns the next server to connect to. retryStart will be true if we've looped through
+ // all known servers without Connected() being called.
+ Next() (server string, retryStart bool)
+ // Notify the HostProvider of a successful connection.
+ Connected()
+}
+
+// ConnectWithDialer establishes a new connection to a pool of zookeeper servers
+// using a custom Dialer. See Connect for further information about session timeout.
+// This method is deprecated and provided for compatibility: use the WithDialer option instead.
+func ConnectWithDialer(servers []string, sessionTimeout time.Duration, dialer Dialer) (*Conn, <-chan Event, error) {
+ return Connect(servers, sessionTimeout, WithDialer(dialer))
+}
+
+// Connect establishes a new connection to a pool of zookeeper
+// servers. The provided session timeout sets the amount of time for which
+// a session is considered valid after losing connection to a server. Within
+// the session timeout it's possible to reestablish a connection to a different
+// server and keep the same session. This is means any ephemeral nodes and
+// watches are maintained.
+func Connect(servers []string, sessionTimeout time.Duration, options ...connOption) (*Conn, <-chan Event, error) {
+ if len(servers) == 0 {
+ return nil, nil, errors.New("zk: server list must not be empty")
+ }
+
+ srvs := make([]string, len(servers))
+
+ for i, addr := range servers {
+ if strings.Contains(addr, ":") {
+ srvs[i] = addr
+ } else {
+ srvs[i] = addr + ":" + strconv.Itoa(DefaultPort)
+ }
+ }
+
+ // Randomize the order of the servers to avoid creating hotspots
+ stringShuffle(srvs)
+
+ ec := make(chan Event, eventChanSize)
+ conn := &Conn{
+ dialer: net.DialTimeout,
+ hostProvider: &DNSHostProvider{},
+ conn: nil,
+ state: StateDisconnected,
+ eventChan: ec,
+ shouldQuit: make(chan struct{}),
+ connectTimeout: 1 * time.Second,
+ sendChan: make(chan *request, sendChanSize),
+ requests: make(map[int32]*request),
+ watchers: make(map[watchPathType][]chan Event),
+ passwd: emptyPassword,
+ logger: DefaultLogger,
+ logInfo: true, // default is true for backwards compatability
+ buf: make([]byte, bufferSize),
+ }
+
+ // Set provided options.
+ for _, option := range options {
+ option(conn)
+ }
+
+ if err := conn.hostProvider.Init(srvs); err != nil {
+ return nil, nil, err
+ }
+
+ conn.setTimeouts(int32(sessionTimeout / time.Millisecond))
+
+ go func() {
+ conn.loop()
+ conn.flushRequests(ErrClosing)
+ conn.invalidateWatches(ErrClosing)
+ close(conn.eventChan)
+ }()
+ return conn, ec, nil
+}
+
+// WithDialer returns a connection option specifying a non-default Dialer.
+func WithDialer(dialer Dialer) connOption {
+ return func(c *Conn) {
+ c.dialer = dialer
+ }
+}
+
+// WithHostProvider returns a connection option specifying a non-default HostProvider.
+func WithHostProvider(hostProvider HostProvider) connOption {
+ return func(c *Conn) {
+ c.hostProvider = hostProvider
+ }
+}
+
+// WithLogger returns a connection option specifying a non-default Logger
+func WithLogger(logger Logger) connOption {
+ return func(c *Conn) {
+ c.logger = logger
+ }
+}
+
+// WithLogInfo returns a connection option specifying whether or not information messages
+// shoud be logged.
+func WithLogInfo(logInfo bool) connOption {
+ return func(c *Conn) {
+ c.logInfo = logInfo
+ }
+}
+
+// EventCallback is a function that is called when an Event occurs.
+type EventCallback func(Event)
+
+// WithEventCallback returns a connection option that specifies an event
+// callback.
+// The callback must not block - doing so would delay the ZK go routines.
+func WithEventCallback(cb EventCallback) connOption {
+ return func(c *Conn) {
+ c.eventCallback = cb
+ }
+}
+
+// WithMaxBufferSize sets the maximum buffer size used to read and decode
+// packets received from the Zookeeper server. The standard Zookeeper client for
+// Java defaults to a limit of 1mb. For backwards compatibility, this Go client
+// defaults to unbounded unless overridden via this option. A value that is zero
+// or negative indicates that no limit is enforced.
+//
+// This is meant to prevent resource exhaustion in the face of potentially
+// malicious data in ZK. It should generally match the server setting (which
+// also defaults ot 1mb) so that clients and servers agree on the limits for
+// things like the size of data in an individual znode and the total size of a
+// transaction.
+//
+// For production systems, this should be set to a reasonable value (ideally
+// that matches the server configuration). For ops tooling, it is handy to use a
+// much larger limit, in order to do things like clean-up problematic state in
+// the ZK tree. For example, if a single znode has a huge number of children, it
+// is possible for the response to a "list children" operation to exceed this
+// buffer size and cause errors in clients. The only way to subsequently clean
+// up the tree (by removing superfluous children) is to use a client configured
+// with a larger buffer size that can successfully query for all of the child
+// names and then remove them. (Note there are other tools that can list all of
+// the child names without an increased buffer size in the client, but they work
+// by inspecting the servers' transaction logs to enumerate children instead of
+// sending an online request to a server.
+func WithMaxBufferSize(maxBufferSize int) connOption {
+ return func(c *Conn) {
+ c.maxBufferSize = maxBufferSize
+ }
+}
+
+// WithMaxConnBufferSize sets maximum buffer size used to send and encode
+// packets to Zookeeper server. The standard Zookeepeer client for java defaults
+// to a limit of 1mb. This option should be used for non-standard server setup
+// where znode is bigger than default 1mb.
+func WithMaxConnBufferSize(maxBufferSize int) connOption {
+ return func(c *Conn) {
+ c.buf = make([]byte, maxBufferSize)
+ }
+}
+
+func (c *Conn) Close() {
+ close(c.shouldQuit)
+
+ select {
+ case <-c.queueRequest(opClose, &closeRequest{}, &closeResponse{}, nil):
+ case <-time.After(time.Second):
+ }
+}
+
+// State returns the current state of the connection.
+func (c *Conn) State() State {
+ return State(atomic.LoadInt32((*int32)(&c.state)))
+}
+
+// SessionID returns the current session id of the connection.
+func (c *Conn) SessionID() int64 {
+ return atomic.LoadInt64(&c.sessionID)
+}
+
+// SetLogger sets the logger to be used for printing errors.
+// Logger is an interface provided by this package.
+func (c *Conn) SetLogger(l Logger) {
+ c.logger = l
+}
+
+func (c *Conn) setTimeouts(sessionTimeoutMs int32) {
+ c.sessionTimeoutMs = sessionTimeoutMs
+ sessionTimeout := time.Duration(sessionTimeoutMs) * time.Millisecond
+ c.recvTimeout = sessionTimeout * 2 / 3
+ c.pingInterval = c.recvTimeout / 2
+}
+
+func (c *Conn) setState(state State) {
+ atomic.StoreInt32((*int32)(&c.state), int32(state))
+ c.sendEvent(Event{Type: EventSession, State: state, Server: c.Server()})
+}
+
+func (c *Conn) sendEvent(evt Event) {
+ if c.eventCallback != nil {
+ c.eventCallback(evt)
+ }
+
+ select {
+ case c.eventChan <- evt:
+ default:
+ // panic("zk: event channel full - it must be monitored and never allowed to be full")
+ }
+}
+
+func (c *Conn) connect() error {
+ var retryStart bool
+ for {
+ c.serverMu.Lock()
+ c.server, retryStart = c.hostProvider.Next()
+ c.serverMu.Unlock()
+ c.setState(StateConnecting)
+ if retryStart {
+ c.flushUnsentRequests(ErrNoServer)
+ select {
+ case <-time.After(time.Second):
+ // pass
+ case <-c.shouldQuit:
+ c.setState(StateDisconnected)
+ c.flushUnsentRequests(ErrClosing)
+ return ErrClosing
+ }
+ }
+
+ zkConn, err := c.dialer("tcp", c.Server(), c.connectTimeout)
+ if err == nil {
+ c.conn = zkConn
+ c.setState(StateConnected)
+ if c.logInfo {
+ c.logger.Printf("Connected to %s", c.Server())
+ }
+ return nil
+ }
+
+ c.logger.Printf("Failed to connect to %s: %+v", c.Server(), err)
+ }
+}
+
+func (c *Conn) resendZkAuth(reauthReadyChan chan struct{}) {
+ shouldCancel := func() bool {
+ select {
+ case <-c.shouldQuit:
+ return true
+ case <-c.closeChan:
+ return true
+ default:
+ return false
+ }
+ }
+
+ c.credsMu.Lock()
+ defer c.credsMu.Unlock()
+
+ defer close(reauthReadyChan)
+
+ if c.logInfo {
+ c.logger.Printf("re-submitting `%d` credentials after reconnect", len(c.creds))
+ }
+
+ for _, cred := range c.creds {
+ if shouldCancel() {
+ return
+ }
+ resChan, err := c.sendRequest(
+ opSetAuth,
+ &setAuthRequest{Type: 0,
+ Scheme: cred.scheme,
+ Auth: cred.auth,
+ },
+ &setAuthResponse{},
+ nil)
+
+ if err != nil {
+ c.logger.Printf("call to sendRequest failed during credential resubmit: %s", err)
+ // FIXME(prozlach): lets ignore errors for now
+ continue
+ }
+
+ var res response
+ select {
+ case res = <-resChan:
+ case <-c.closeChan:
+ c.logger.Printf("recv closed, cancel re-submitting credentials")
+ return
+ case <-c.shouldQuit:
+ c.logger.Printf("should quit, cancel re-submitting credentials")
+ return
+ }
+ if res.err != nil {
+ c.logger.Printf("credential re-submit failed: %s", res.err)
+ // FIXME(prozlach): lets ignore errors for now
+ continue
+ }
+ }
+}
+
+func (c *Conn) sendRequest(
+ opcode int32,
+ req interface{},
+ res interface{},
+ recvFunc func(*request, *responseHeader, error),
+) (
+ <-chan response,
+ error,
+) {
+ rq := &request{
+ xid: c.nextXid(),
+ opcode: opcode,
+ pkt: req,
+ recvStruct: res,
+ recvChan: make(chan response, 1),
+ recvFunc: recvFunc,
+ }
+
+ if err := c.sendData(rq); err != nil {
+ return nil, err
+ }
+
+ return rq.recvChan, nil
+}
+
+func (c *Conn) loop() {
+ for {
+ if err := c.connect(); err != nil {
+ // c.Close() was called
+ return
+ }
+
+ err := c.authenticate()
+ switch {
+ case err == ErrSessionExpired:
+ c.logger.Printf("authentication failed: %s", err)
+ c.invalidateWatches(err)
+ case err != nil && c.conn != nil:
+ c.logger.Printf("authentication failed: %s", err)
+ c.conn.Close()
+ case err == nil:
+ if c.logInfo {
+ c.logger.Printf("authenticated: id=%d, timeout=%d", c.SessionID(), c.sessionTimeoutMs)
+ }
+ c.hostProvider.Connected() // mark success
+ c.closeChan = make(chan struct{}) // channel to tell send loop stop
+ reauthChan := make(chan struct{}) // channel to tell send loop that authdata has been resubmitted
+
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ <-reauthChan
+ if c.debugCloseRecvLoop {
+ close(c.debugReauthDone)
+ }
+ err := c.sendLoop()
+ if err != nil || c.logInfo {
+ c.logger.Printf("send loop terminated: err=%v", err)
+ }
+ c.conn.Close() // causes recv loop to EOF/exit
+ wg.Done()
+ }()
+
+ wg.Add(1)
+ go func() {
+ var err error
+ if c.debugCloseRecvLoop {
+ err = errors.New("DEBUG: close recv loop")
+ } else {
+ err = c.recvLoop(c.conn)
+ }
+ if err != io.EOF || c.logInfo {
+ c.logger.Printf("recv loop terminated: err=%v", err)
+ }
+ if err == nil {
+ panic("zk: recvLoop should never return nil error")
+ }
+ close(c.closeChan) // tell send loop to exit
+ wg.Done()
+ }()
+
+ c.resendZkAuth(reauthChan)
+
+ c.sendSetWatches()
+ wg.Wait()
+ }
+
+ c.setState(StateDisconnected)
+
+ select {
+ case <-c.shouldQuit:
+ c.flushRequests(ErrClosing)
+ return
+ default:
+ }
+
+ if err != ErrSessionExpired {
+ err = ErrConnectionClosed
+ }
+ c.flushRequests(err)
+
+ if c.reconnectLatch != nil {
+ select {
+ case <-c.shouldQuit:
+ return
+ case <-c.reconnectLatch:
+ }
+ }
+ }
+}
+
+func (c *Conn) flushUnsentRequests(err error) {
+ for {
+ select {
+ default:
+ return
+ case req := <-c.sendChan:
+ req.recvChan <- response{-1, err}
+ }
+ }
+}
+
+// Send error to all pending requests and clear request map
+func (c *Conn) flushRequests(err error) {
+ c.requestsLock.Lock()
+ for _, req := range c.requests {
+ req.recvChan <- response{-1, err}
+ }
+ c.requests = make(map[int32]*request)
+ c.requestsLock.Unlock()
+}
+
+// Send error to all watchers and clear watchers map
+func (c *Conn) invalidateWatches(err error) {
+ c.watchersLock.Lock()
+ defer c.watchersLock.Unlock()
+
+ if len(c.watchers) >= 0 {
+ for pathType, watchers := range c.watchers {
+ ev := Event{Type: EventNotWatching, State: StateDisconnected, Path: pathType.path, Err: err}
+ for _, ch := range watchers {
+ ch <- ev
+ close(ch)
+ }
+ }
+ c.watchers = make(map[watchPathType][]chan Event)
+ }
+}
+
+func (c *Conn) sendSetWatches() {
+ c.watchersLock.Lock()
+ defer c.watchersLock.Unlock()
+
+ if len(c.watchers) == 0 {
+ return
+ }
+
+ // NB: A ZK server, by default, rejects packets >1mb. So, if we have too
+ // many watches to reset, we need to break this up into multiple packets
+ // to avoid hitting that limit. Mirroring the Java client behavior: we are
+ // conservative in that we limit requests to 128kb (since server limit is
+ // is actually configurable and could conceivably be configured smaller
+ // than default of 1mb).
+ limit := 128 * 1024
+ if c.setWatchLimit > 0 {
+ limit = c.setWatchLimit
+ }
+
+ var reqs []*setWatchesRequest
+ var req *setWatchesRequest
+ var sizeSoFar int
+
+ n := 0
+ for pathType, watchers := range c.watchers {
+ if len(watchers) == 0 {
+ continue
+ }
+ addlLen := 4 + len(pathType.path)
+ if req == nil || sizeSoFar+addlLen > limit {
+ if req != nil {
+ // add to set of requests that we'll send
+ reqs = append(reqs, req)
+ }
+ sizeSoFar = 28 // fixed overhead of a set-watches packet
+ req = &setWatchesRequest{
+ RelativeZxid: c.lastZxid,
+ DataWatches: make([]string, 0),
+ ExistWatches: make([]string, 0),
+ ChildWatches: make([]string, 0),
+ }
+ }
+ sizeSoFar += addlLen
+ switch pathType.wType {
+ case watchTypeData:
+ req.DataWatches = append(req.DataWatches, pathType.path)
+ case watchTypeExist:
+ req.ExistWatches = append(req.ExistWatches, pathType.path)
+ case watchTypeChild:
+ req.ChildWatches = append(req.ChildWatches, pathType.path)
+ }
+ n++
+ }
+ if n == 0 {
+ return
+ }
+ if req != nil { // don't forget any trailing packet we were building
+ reqs = append(reqs, req)
+ }
+
+ if c.setWatchCallback != nil {
+ c.setWatchCallback(reqs)
+ }
+
+ go func() {
+ res := &setWatchesResponse{}
+ // TODO: Pipeline these so queue all of them up before waiting on any
+ // response. That will require some investigation to make sure there
+ // aren't failure modes where a blocking write to the channel of requests
+ // could hang indefinitely and cause this goroutine to leak...
+ for _, req := range reqs {
+ _, err := c.request(opSetWatches, req, res, nil)
+ if err != nil {
+ c.logger.Printf("Failed to set previous watches: %s", err.Error())
+ break
+ }
+ }
+ }()
+}
+
+func (c *Conn) authenticate() error {
+ buf := make([]byte, 256)
+
+ // Encode and send a connect request.
+ n, err := encodePacket(buf[4:], &connectRequest{
+ ProtocolVersion: protocolVersion,
+ LastZxidSeen: c.lastZxid,
+ TimeOut: c.sessionTimeoutMs,
+ SessionID: c.SessionID(),
+ Passwd: c.passwd,
+ })
+ if err != nil {
+ return err
+ }
+
+ binary.BigEndian.PutUint32(buf[:4], uint32(n))
+
+ if err := c.conn.SetWriteDeadline(time.Now().Add(c.recvTimeout * 10)); err != nil {
+ return err
+ }
+ _, err = c.conn.Write(buf[:n+4])
+ if err != nil {
+ return err
+ }
+ if err := c.conn.SetWriteDeadline(time.Time{}); err != nil {
+ return err
+ }
+
+ // Receive and decode a connect response.
+ if err := c.conn.SetReadDeadline(time.Now().Add(c.recvTimeout * 10)); err != nil {
+ return err
+ }
+ _, err = io.ReadFull(c.conn, buf[:4])
+ if err != nil {
+ return err
+ }
+ if err := c.conn.SetReadDeadline(time.Time{}); err != nil {
+ return err
+ }
+
+ blen := int(binary.BigEndian.Uint32(buf[:4]))
+ if cap(buf) < blen {
+ buf = make([]byte, blen)
+ }
+
+ _, err = io.ReadFull(c.conn, buf[:blen])
+ if err != nil {
+ return err
+ }
+
+ r := connectResponse{}
+ _, err = decodePacket(buf[:blen], &r)
+ if err != nil {
+ return err
+ }
+ if r.SessionID == 0 {
+ atomic.StoreInt64(&c.sessionID, int64(0))
+ c.passwd = emptyPassword
+ c.lastZxid = 0
+ c.setState(StateExpired)
+ return ErrSessionExpired
+ }
+
+ atomic.StoreInt64(&c.sessionID, r.SessionID)
+ c.setTimeouts(r.TimeOut)
+ c.passwd = r.Passwd
+ c.setState(StateHasSession)
+
+ return nil
+}
+
+func (c *Conn) sendData(req *request) error {
+ header := &requestHeader{req.xid, req.opcode}
+ n, err := encodePacket(c.buf[4:], header)
+ if err != nil {
+ req.recvChan <- response{-1, err}
+ return nil
+ }
+
+ n2, err := encodePacket(c.buf[4+n:], req.pkt)
+ if err != nil {
+ req.recvChan <- response{-1, err}
+ return nil
+ }
+
+ n += n2
+
+ binary.BigEndian.PutUint32(c.buf[:4], uint32(n))
+
+ c.requestsLock.Lock()
+ select {
+ case <-c.closeChan:
+ req.recvChan <- response{-1, ErrConnectionClosed}
+ c.requestsLock.Unlock()
+ return ErrConnectionClosed
+ default:
+ }
+ c.requests[req.xid] = req
+ c.requestsLock.Unlock()
+
+ if err := c.conn.SetWriteDeadline(time.Now().Add(c.recvTimeout)); err != nil {
+ return err
+ }
+ _, err = c.conn.Write(c.buf[:n+4])
+ if err != nil {
+ req.recvChan <- response{-1, err}
+ c.conn.Close()
+ return err
+ }
+ if err := c.conn.SetWriteDeadline(time.Time{}); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (c *Conn) sendLoop() error {
+ pingTicker := time.NewTicker(c.pingInterval)
+ defer pingTicker.Stop()
+
+ for {
+ select {
+ case req := <-c.sendChan:
+ if err := c.sendData(req); err != nil {
+ return err
+ }
+ case <-pingTicker.C:
+ n, err := encodePacket(c.buf[4:], &requestHeader{Xid: -2, Opcode: opPing})
+ if err != nil {
+ panic("zk: opPing should never fail to serialize")
+ }
+
+ binary.BigEndian.PutUint32(c.buf[:4], uint32(n))
+
+ if err := c.conn.SetWriteDeadline(time.Now().Add(c.recvTimeout)); err != nil {
+ return err
+ }
+ _, err = c.conn.Write(c.buf[:n+4])
+ if err != nil {
+ c.conn.Close()
+ return err
+ }
+ if err := c.conn.SetWriteDeadline(time.Time{}); err != nil {
+ return err
+ }
+ case <-c.closeChan:
+ return nil
+ }
+ }
+}
+
+func (c *Conn) recvLoop(conn net.Conn) error {
+ sz := bufferSize
+ if c.maxBufferSize > 0 && sz > c.maxBufferSize {
+ sz = c.maxBufferSize
+ }
+ buf := make([]byte, sz)
+ for {
+ // package length
+ if err := conn.SetReadDeadline(time.Now().Add(c.recvTimeout)); err != nil {
+ c.logger.Printf("failed to set connection deadline: %v", err)
+ }
+ _, err := io.ReadFull(conn, buf[:4])
+ if err != nil {
+ return fmt.Errorf("failed to read from connection: %v", err)
+ }
+
+ blen := int(binary.BigEndian.Uint32(buf[:4]))
+ if cap(buf) < blen {
+ if c.maxBufferSize > 0 && blen > c.maxBufferSize {
+ return fmt.Errorf("received packet from server with length %d, which exceeds max buffer size %d", blen, c.maxBufferSize)
+ }
+ buf = make([]byte, blen)
+ }
+
+ _, err = io.ReadFull(conn, buf[:blen])
+ if err != nil {
+ return err
+ }
+ if err := conn.SetReadDeadline(time.Time{}); err != nil {
+ return err
+ }
+
+ res := responseHeader{}
+ _, err = decodePacket(buf[:16], &res)
+ if err != nil {
+ return err
+ }
+
+ if res.Xid == -1 {
+ res := &watcherEvent{}
+ _, err := decodePacket(buf[16:blen], res)
+ if err != nil {
+ return err
+ }
+ ev := Event{
+ Type: res.Type,
+ State: res.State,
+ Path: res.Path,
+ Err: nil,
+ }
+ c.sendEvent(ev)
+ wTypes := make([]watchType, 0, 2)
+ switch res.Type {
+ case EventNodeCreated:
+ wTypes = append(wTypes, watchTypeExist)
+ case EventNodeDeleted, EventNodeDataChanged:
+ wTypes = append(wTypes, watchTypeExist, watchTypeData, watchTypeChild)
+ case EventNodeChildrenChanged:
+ wTypes = append(wTypes, watchTypeChild)
+ }
+ c.watchersLock.Lock()
+ for _, t := range wTypes {
+ wpt := watchPathType{res.Path, t}
+ if watchers, ok := c.watchers[wpt]; ok {
+ for _, ch := range watchers {
+ ch <- ev
+ close(ch)
+ }
+ delete(c.watchers, wpt)
+ }
+ }
+ c.watchersLock.Unlock()
+ } else if res.Xid == -2 {
+ // Ping response. Ignore.
+ } else if res.Xid < 0 {
+ c.logger.Printf("Xid < 0 (%d) but not ping or watcher event", res.Xid)
+ } else {
+ if res.Zxid > 0 {
+ c.lastZxid = res.Zxid
+ }
+
+ c.requestsLock.Lock()
+ req, ok := c.requests[res.Xid]
+ if ok {
+ delete(c.requests, res.Xid)
+ }
+ c.requestsLock.Unlock()
+
+ if !ok {
+ c.logger.Printf("Response for unknown request with xid %d", res.Xid)
+ } else {
+ if res.Err != 0 {
+ err = res.Err.toError()
+ } else {
+ _, err = decodePacket(buf[16:blen], req.recvStruct)
+ }
+ if req.recvFunc != nil {
+ req.recvFunc(req, &res, err)
+ }
+ req.recvChan <- response{res.Zxid, err}
+ if req.opcode == opClose {
+ return io.EOF
+ }
+ }
+ }
+ }
+}
+
+func (c *Conn) nextXid() int32 {
+ return int32(atomic.AddUint32(&c.xid, 1) & 0x7fffffff)
+}
+
+func (c *Conn) addWatcher(path string, watchType watchType) <-chan Event {
+ c.watchersLock.Lock()
+ defer c.watchersLock.Unlock()
+
+ ch := make(chan Event, 1)
+ wpt := watchPathType{path, watchType}
+ c.watchers[wpt] = append(c.watchers[wpt], ch)
+ return ch
+}
+
+func (c *Conn) queueRequest(opcode int32, req interface{}, res interface{}, recvFunc func(*request, *responseHeader, error)) <-chan response {
+ rq := &request{
+ xid: c.nextXid(),
+ opcode: opcode,
+ pkt: req,
+ recvStruct: res,
+ recvChan: make(chan response, 1),
+ recvFunc: recvFunc,
+ }
+ c.sendChan <- rq
+ return rq.recvChan
+}
+
+func (c *Conn) request(opcode int32, req interface{}, res interface{}, recvFunc func(*request, *responseHeader, error)) (int64, error) {
+ r := <-c.queueRequest(opcode, req, res, recvFunc)
+ return r.zxid, r.err
+}
+
+func (c *Conn) AddAuth(scheme string, auth []byte) error {
+ _, err := c.request(opSetAuth, &setAuthRequest{Type: 0, Scheme: scheme, Auth: auth}, &setAuthResponse{}, nil)
+
+ if err != nil {
+ return err
+ }
+
+ // Remember authdata so that it can be re-submitted on reconnect
+ //
+ // FIXME(prozlach): For now we treat "userfoo:passbar" and "userfoo:passbar2"
+ // as two different entries, which will be re-submitted on reconnet. Some
+ // research is needed on how ZK treats these cases and
+ // then maybe switch to something like "map[username] = password" to allow
+ // only single password for given user with users being unique.
+ obj := authCreds{
+ scheme: scheme,
+ auth: auth,
+ }
+
+ c.credsMu.Lock()
+ c.creds = append(c.creds, obj)
+ c.credsMu.Unlock()
+
+ return nil
+}
+
+func (c *Conn) Children(path string) ([]string, *Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, nil, err
+ }
+
+ res := &getChildren2Response{}
+ _, err := c.request(opGetChildren2, &getChildren2Request{Path: path, Watch: false}, res, nil)
+ return res.Children, &res.Stat, err
+}
+
+func (c *Conn) ChildrenW(path string) ([]string, *Stat, <-chan Event, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, nil, nil, err
+ }
+
+ var ech <-chan Event
+ res := &getChildren2Response{}
+ _, err := c.request(opGetChildren2, &getChildren2Request{Path: path, Watch: true}, res, func(req *request, res *responseHeader, err error) {
+ if err == nil {
+ ech = c.addWatcher(path, watchTypeChild)
+ }
+ })
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ return res.Children, &res.Stat, ech, err
+}
+
+func (c *Conn) Get(path string) ([]byte, *Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, nil, err
+ }
+
+ res := &getDataResponse{}
+ _, err := c.request(opGetData, &getDataRequest{Path: path, Watch: false}, res, nil)
+ return res.Data, &res.Stat, err
+}
+
+// GetW returns the contents of a znode and sets a watch
+func (c *Conn) GetW(path string) ([]byte, *Stat, <-chan Event, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, nil, nil, err
+ }
+
+ var ech <-chan Event
+ res := &getDataResponse{}
+ _, err := c.request(opGetData, &getDataRequest{Path: path, Watch: true}, res, func(req *request, res *responseHeader, err error) {
+ if err == nil {
+ ech = c.addWatcher(path, watchTypeData)
+ }
+ })
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ return res.Data, &res.Stat, ech, err
+}
+
+func (c *Conn) Set(path string, data []byte, version int32) (*Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, err
+ }
+
+ res := &setDataResponse{}
+ _, err := c.request(opSetData, &SetDataRequest{path, data, version}, res, nil)
+ return &res.Stat, err
+}
+
+func (c *Conn) Create(path string, data []byte, flags int32, acl []ACL) (string, error) {
+ if err := validatePath(path, flags&FlagSequence == FlagSequence); err != nil {
+ return "", err
+ }
+
+ res := &createResponse{}
+ _, err := c.request(opCreate, &CreateRequest{path, data, acl, flags}, res, nil)
+ return res.Path, err
+}
+
+// CreateProtectedEphemeralSequential fixes a race condition if the server crashes
+// after it creates the node. On reconnect the session may still be valid so the
+// ephemeral node still exists. Therefore, on reconnect we need to check if a node
+// with a GUID generated on create exists.
+func (c *Conn) CreateProtectedEphemeralSequential(path string, data []byte, acl []ACL) (string, error) {
+ if err := validatePath(path, true); err != nil {
+ return "", err
+ }
+
+ var guid [16]byte
+ _, err := io.ReadFull(rand.Reader, guid[:16])
+ if err != nil {
+ return "", err
+ }
+ guidStr := fmt.Sprintf("%x", guid)
+
+ parts := strings.Split(path, "/")
+ parts[len(parts)-1] = fmt.Sprintf("%s%s-%s", protectedPrefix, guidStr, parts[len(parts)-1])
+ rootPath := strings.Join(parts[:len(parts)-1], "/")
+ protectedPath := strings.Join(parts, "/")
+
+ var newPath string
+ for i := 0; i < 3; i++ {
+ newPath, err = c.Create(protectedPath, data, FlagEphemeral|FlagSequence, acl)
+ switch err {
+ case ErrSessionExpired:
+ // No need to search for the node since it can't exist. Just try again.
+ case ErrConnectionClosed:
+ children, _, err := c.Children(rootPath)
+ if err != nil {
+ return "", err
+ }
+ for _, p := range children {
+ parts := strings.Split(p, "/")
+ if pth := parts[len(parts)-1]; strings.HasPrefix(pth, protectedPrefix) {
+ if g := pth[len(protectedPrefix) : len(protectedPrefix)+32]; g == guidStr {
+ return rootPath + "/" + p, nil
+ }
+ }
+ }
+ case nil:
+ return newPath, nil
+ default:
+ return "", err
+ }
+ }
+ return "", err
+}
+
+func (c *Conn) Delete(path string, version int32) error {
+ if err := validatePath(path, false); err != nil {
+ return err
+ }
+
+ _, err := c.request(opDelete, &DeleteRequest{path, version}, &deleteResponse{}, nil)
+ return err
+}
+
+func (c *Conn) Exists(path string) (bool, *Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return false, nil, err
+ }
+
+ res := &existsResponse{}
+ _, err := c.request(opExists, &existsRequest{Path: path, Watch: false}, res, nil)
+ exists := true
+ if err == ErrNoNode {
+ exists = false
+ err = nil
+ }
+ return exists, &res.Stat, err
+}
+
+func (c *Conn) ExistsW(path string) (bool, *Stat, <-chan Event, error) {
+ if err := validatePath(path, false); err != nil {
+ return false, nil, nil, err
+ }
+
+ var ech <-chan Event
+ res := &existsResponse{}
+ _, err := c.request(opExists, &existsRequest{Path: path, Watch: true}, res, func(req *request, res *responseHeader, err error) {
+ if err == nil {
+ ech = c.addWatcher(path, watchTypeData)
+ } else if err == ErrNoNode {
+ ech = c.addWatcher(path, watchTypeExist)
+ }
+ })
+ exists := true
+ if err == ErrNoNode {
+ exists = false
+ err = nil
+ }
+ if err != nil {
+ return false, nil, nil, err
+ }
+ return exists, &res.Stat, ech, err
+}
+
+func (c *Conn) GetACL(path string) ([]ACL, *Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, nil, err
+ }
+
+ res := &getAclResponse{}
+ _, err := c.request(opGetAcl, &getAclRequest{Path: path}, res, nil)
+ return res.Acl, &res.Stat, err
+}
+func (c *Conn) SetACL(path string, acl []ACL, version int32) (*Stat, error) {
+ if err := validatePath(path, false); err != nil {
+ return nil, err
+ }
+
+ res := &setAclResponse{}
+ _, err := c.request(opSetAcl, &setAclRequest{Path: path, Acl: acl, Version: version}, res, nil)
+ return &res.Stat, err
+}
+
+func (c *Conn) Sync(path string) (string, error) {
+ if err := validatePath(path, false); err != nil {
+ return "", err
+ }
+
+ res := &syncResponse{}
+ _, err := c.request(opSync, &syncRequest{Path: path}, res, nil)
+ return res.Path, err
+}
+
+type MultiResponse struct {
+ Stat *Stat
+ String string
+ Error error
+}
+
+// Multi executes multiple ZooKeeper operations or none of them. The provided
+// ops must be one of *CreateRequest, *DeleteRequest, *SetDataRequest, or
+// *CheckVersionRequest.
+func (c *Conn) Multi(ops ...interface{}) ([]MultiResponse, error) {
+ req := &multiRequest{
+ Ops: make([]multiRequestOp, 0, len(ops)),
+ DoneHeader: multiHeader{Type: -1, Done: true, Err: -1},
+ }
+ for _, op := range ops {
+ var opCode int32
+ switch op.(type) {
+ case *CreateRequest:
+ opCode = opCreate
+ case *SetDataRequest:
+ opCode = opSetData
+ case *DeleteRequest:
+ opCode = opDelete
+ case *CheckVersionRequest:
+ opCode = opCheck
+ default:
+ return nil, fmt.Errorf("unknown operation type %T", op)
+ }
+ req.Ops = append(req.Ops, multiRequestOp{multiHeader{opCode, false, -1}, op})
+ }
+ res := &multiResponse{}
+ _, err := c.request(opMulti, req, res, nil)
+ mr := make([]MultiResponse, len(res.Ops))
+ for i, op := range res.Ops {
+ mr[i] = MultiResponse{Stat: op.Stat, String: op.String, Error: op.Err.toError()}
+ }
+ return mr, err
+}
+
+// IncrementalReconfig is the zookeeper reconfiguration api that allows adding and removing servers
+// by lists of members.
+// Return the new configuration stats.
+func (c *Conn) IncrementalReconfig(joining, leaving []string, version int64) (*Stat, error) {
+ // TODO: validate the shape of the member string to give early feedback.
+ request := &reconfigRequest{
+ JoiningServers: []byte(strings.Join(joining, ",")),
+ LeavingServers: []byte(strings.Join(leaving, ",")),
+ CurConfigId: version,
+ }
+
+ return c.internalReconfig(request)
+}
+
+// Reconfig is the non-incremental update functionality for Zookeeper where the list preovided
+// is the entire new member list.
+// the optional version allows for conditional reconfigurations, -1 ignores the condition.
+func (c *Conn) Reconfig(members []string, version int64) (*Stat, error) {
+ request := &reconfigRequest{
+ NewMembers: []byte(strings.Join(members, ",")),
+ CurConfigId: version,
+ }
+
+ return c.internalReconfig(request)
+}
+
+func (c *Conn) internalReconfig(request *reconfigRequest) (*Stat, error) {
+ response := &reconfigReponse{}
+ _, err := c.request(opReconfig, request, response, nil)
+ return &response.Stat, err
+}
+
+// Server returns the current or last-connected server name.
+func (c *Conn) Server() string {
+ c.serverMu.Lock()
+ defer c.serverMu.Unlock()
+ return c.server
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/constants.go b/vendor/github.com/samuel/go-zookeeper/zk/constants.go
new file mode 100644
index 00000000..ccafcfc9
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/constants.go
@@ -0,0 +1,249 @@
+package zk
+
+import (
+ "errors"
+ "fmt"
+)
+
+const (
+ protocolVersion = 0
+
+ DefaultPort = 2181
+)
+
+const (
+ opNotify = 0
+ opCreate = 1
+ opDelete = 2
+ opExists = 3
+ opGetData = 4
+ opSetData = 5
+ opGetAcl = 6
+ opSetAcl = 7
+ opGetChildren = 8
+ opSync = 9
+ opPing = 11
+ opGetChildren2 = 12
+ opCheck = 13
+ opMulti = 14
+ opReconfig = 16
+ opClose = -11
+ opSetAuth = 100
+ opSetWatches = 101
+ opError = -1
+ // Not in protocol, used internally
+ opWatcherEvent = -2
+)
+
+const (
+ EventNodeCreated EventType = 1
+ EventNodeDeleted EventType = 2
+ EventNodeDataChanged EventType = 3
+ EventNodeChildrenChanged EventType = 4
+
+ EventSession EventType = -1
+ EventNotWatching EventType = -2
+)
+
+var (
+ eventNames = map[EventType]string{
+ EventNodeCreated: "EventNodeCreated",
+ EventNodeDeleted: "EventNodeDeleted",
+ EventNodeDataChanged: "EventNodeDataChanged",
+ EventNodeChildrenChanged: "EventNodeChildrenChanged",
+ EventSession: "EventSession",
+ EventNotWatching: "EventNotWatching",
+ }
+)
+
+const (
+ StateUnknown State = -1
+ StateDisconnected State = 0
+ StateConnecting State = 1
+ StateAuthFailed State = 4
+ StateConnectedReadOnly State = 5
+ StateSaslAuthenticated State = 6
+ StateExpired State = -112
+
+ StateConnected = State(100)
+ StateHasSession = State(101)
+)
+
+const (
+ FlagEphemeral = 1
+ FlagSequence = 2
+)
+
+var (
+ stateNames = map[State]string{
+ StateUnknown: "StateUnknown",
+ StateDisconnected: "StateDisconnected",
+ StateConnectedReadOnly: "StateConnectedReadOnly",
+ StateSaslAuthenticated: "StateSaslAuthenticated",
+ StateExpired: "StateExpired",
+ StateAuthFailed: "StateAuthFailed",
+ StateConnecting: "StateConnecting",
+ StateConnected: "StateConnected",
+ StateHasSession: "StateHasSession",
+ }
+)
+
+type State int32
+
+func (s State) String() string {
+ if name := stateNames[s]; name != "" {
+ return name
+ }
+ return "unknown state"
+}
+
+type ErrCode int32
+
+var (
+ ErrConnectionClosed = errors.New("zk: connection closed")
+ ErrUnknown = errors.New("zk: unknown error")
+ ErrAPIError = errors.New("zk: api error")
+ ErrNoNode = errors.New("zk: node does not exist")
+ ErrNoAuth = errors.New("zk: not authenticated")
+ ErrBadVersion = errors.New("zk: version conflict")
+ ErrNoChildrenForEphemerals = errors.New("zk: ephemeral nodes may not have children")
+ ErrNodeExists = errors.New("zk: node already exists")
+ ErrNotEmpty = errors.New("zk: node has children")
+ ErrSessionExpired = errors.New("zk: session has been expired by the server")
+ ErrInvalidACL = errors.New("zk: invalid ACL specified")
+ ErrAuthFailed = errors.New("zk: client authentication failed")
+ ErrClosing = errors.New("zk: zookeeper is closing")
+ ErrNothing = errors.New("zk: no server responsees to process")
+ ErrSessionMoved = errors.New("zk: session moved to another server, so operation is ignored")
+ ErrReconfigDisabled = errors.New("attempts to perform a reconfiguration operation when reconfiguration feature is disabled")
+ ErrBadArguments = errors.New("invalid arguments")
+ // ErrInvalidCallback = errors.New("zk: invalid callback specified")
+
+ errCodeToError = map[ErrCode]error{
+ 0: nil,
+ errAPIError: ErrAPIError,
+ errNoNode: ErrNoNode,
+ errNoAuth: ErrNoAuth,
+ errBadVersion: ErrBadVersion,
+ errNoChildrenForEphemerals: ErrNoChildrenForEphemerals,
+ errNodeExists: ErrNodeExists,
+ errNotEmpty: ErrNotEmpty,
+ errSessionExpired: ErrSessionExpired,
+ // errInvalidCallback: ErrInvalidCallback,
+ errInvalidAcl: ErrInvalidACL,
+ errAuthFailed: ErrAuthFailed,
+ errClosing: ErrClosing,
+ errNothing: ErrNothing,
+ errSessionMoved: ErrSessionMoved,
+ errZReconfigDisabled: ErrReconfigDisabled,
+ errBadArguments: ErrBadArguments,
+ }
+)
+
+func (e ErrCode) toError() error {
+ if err, ok := errCodeToError[e]; ok {
+ return err
+ }
+ return fmt.Errorf("unknown error: %v", e)
+}
+
+const (
+ errOk = 0
+ // System and server-side errors
+ errSystemError = -1
+ errRuntimeInconsistency = -2
+ errDataInconsistency = -3
+ errConnectionLoss = -4
+ errMarshallingError = -5
+ errUnimplemented = -6
+ errOperationTimeout = -7
+ errBadArguments = -8
+ errInvalidState = -9
+ // API errors
+ errAPIError ErrCode = -100
+ errNoNode ErrCode = -101 // *
+ errNoAuth ErrCode = -102
+ errBadVersion ErrCode = -103 // *
+ errNoChildrenForEphemerals ErrCode = -108
+ errNodeExists ErrCode = -110 // *
+ errNotEmpty ErrCode = -111
+ errSessionExpired ErrCode = -112
+ errInvalidCallback ErrCode = -113
+ errInvalidAcl ErrCode = -114
+ errAuthFailed ErrCode = -115
+ errClosing ErrCode = -116
+ errNothing ErrCode = -117
+ errSessionMoved ErrCode = -118
+ // Attempts to perform a reconfiguration operation when reconfiguration feature is disabled
+ errZReconfigDisabled ErrCode = -123
+)
+
+// Constants for ACL permissions
+const (
+ PermRead = 1 << iota
+ PermWrite
+ PermCreate
+ PermDelete
+ PermAdmin
+ PermAll = 0x1f
+)
+
+var (
+ emptyPassword = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+ opNames = map[int32]string{
+ opNotify: "notify",
+ opCreate: "create",
+ opDelete: "delete",
+ opExists: "exists",
+ opGetData: "getData",
+ opSetData: "setData",
+ opGetAcl: "getACL",
+ opSetAcl: "setACL",
+ opGetChildren: "getChildren",
+ opSync: "sync",
+ opPing: "ping",
+ opGetChildren2: "getChildren2",
+ opCheck: "check",
+ opMulti: "multi",
+ opReconfig: "reconfig",
+ opClose: "close",
+ opSetAuth: "setAuth",
+ opSetWatches: "setWatches",
+
+ opWatcherEvent: "watcherEvent",
+ }
+)
+
+type EventType int32
+
+func (t EventType) String() string {
+ if name := eventNames[t]; name != "" {
+ return name
+ }
+ return "Unknown"
+}
+
+// Mode is used to build custom server modes (leader|follower|standalone).
+type Mode uint8
+
+func (m Mode) String() string {
+ if name := modeNames[m]; name != "" {
+ return name
+ }
+ return "unknown"
+}
+
+const (
+ ModeUnknown Mode = iota
+ ModeLeader Mode = iota
+ ModeFollower Mode = iota
+ ModeStandalone Mode = iota
+)
+
+var (
+ modeNames = map[Mode]string{
+ ModeLeader: "leader",
+ ModeFollower: "follower",
+ ModeStandalone: "standalone",
+ }
+)
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/dnshostprovider.go b/vendor/github.com/samuel/go-zookeeper/zk/dnshostprovider.go
new file mode 100644
index 00000000..f4bba8d0
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/dnshostprovider.go
@@ -0,0 +1,88 @@
+package zk
+
+import (
+ "fmt"
+ "net"
+ "sync"
+)
+
+// DNSHostProvider is the default HostProvider. It currently matches
+// the Java StaticHostProvider, resolving hosts from DNS once during
+// the call to Init. It could be easily extended to re-query DNS
+// periodically or if there is trouble connecting.
+type DNSHostProvider struct {
+ mu sync.Mutex // Protects everything, so we can add asynchronous updates later.
+ servers []string
+ curr int
+ last int
+ lookupHost func(string) ([]string, error) // Override of net.LookupHost, for testing.
+}
+
+// Init is called first, with the servers specified in the connection
+// string. It uses DNS to look up addresses for each server, then
+// shuffles them all together.
+func (hp *DNSHostProvider) Init(servers []string) error {
+ hp.mu.Lock()
+ defer hp.mu.Unlock()
+
+ lookupHost := hp.lookupHost
+ if lookupHost == nil {
+ lookupHost = net.LookupHost
+ }
+
+ found := []string{}
+ for _, server := range servers {
+ host, port, err := net.SplitHostPort(server)
+ if err != nil {
+ return err
+ }
+ addrs, err := lookupHost(host)
+ if err != nil {
+ return err
+ }
+ for _, addr := range addrs {
+ found = append(found, net.JoinHostPort(addr, port))
+ }
+ }
+
+ if len(found) == 0 {
+ return fmt.Errorf("No hosts found for addresses %q", servers)
+ }
+
+ // Randomize the order of the servers to avoid creating hotspots
+ stringShuffle(found)
+
+ hp.servers = found
+ hp.curr = -1
+ hp.last = -1
+
+ return nil
+}
+
+// Len returns the number of servers available
+func (hp *DNSHostProvider) Len() int {
+ hp.mu.Lock()
+ defer hp.mu.Unlock()
+ return len(hp.servers)
+}
+
+// Next returns the next server to connect to. retryStart will be true
+// if we've looped through all known servers without Connected() being
+// called.
+func (hp *DNSHostProvider) Next() (server string, retryStart bool) {
+ hp.mu.Lock()
+ defer hp.mu.Unlock()
+ hp.curr = (hp.curr + 1) % len(hp.servers)
+ retryStart = hp.curr == hp.last
+ if hp.last == -1 {
+ hp.last = 0
+ }
+ return hp.servers[hp.curr], retryStart
+}
+
+// Connected notifies the HostProvider of a successful connection.
+func (hp *DNSHostProvider) Connected() {
+ hp.mu.Lock()
+ defer hp.mu.Unlock()
+ hp.last = hp.curr
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/flw.go b/vendor/github.com/samuel/go-zookeeper/zk/flw.go
new file mode 100644
index 00000000..1fb8b2ae
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/flw.go
@@ -0,0 +1,270 @@
+package zk
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// FLWSrvr is a FourLetterWord helper function. In particular, this function pulls the srvr output
+// from the zookeeper instances and parses the output. A slice of *ServerStats structs are returned
+// as well as a boolean value to indicate whether this function processed successfully.
+//
+// If the boolean value is false there was a problem. If the *ServerStats slice is empty or nil,
+// then the error happened before we started to obtain 'srvr' values. Otherwise, one of the
+// servers had an issue and the "Error" value in the struct should be inspected to determine
+// which server had the issue.
+func FLWSrvr(servers []string, timeout time.Duration) ([]*ServerStats, bool) {
+ // different parts of the regular expression that are required to parse the srvr output
+ const (
+ zrVer = `^Zookeeper version: ([A-Za-z0-9\.\-]+), built on (\d\d/\d\d/\d\d\d\d \d\d:\d\d [A-Za-z0-9:\+\-]+)`
+ zrLat = `^Latency min/avg/max: (\d+)/(\d+)/(\d+)`
+ zrNet = `^Received: (\d+).*\n^Sent: (\d+).*\n^Connections: (\d+).*\n^Outstanding: (\d+)`
+ zrState = `^Zxid: (0x[A-Za-z0-9]+).*\n^Mode: (\w+).*\n^Node count: (\d+)`
+ )
+
+ // build the regex from the pieces above
+ re, err := regexp.Compile(fmt.Sprintf(`(?m:\A%v.*\n%v.*\n%v.*\n%v)`, zrVer, zrLat, zrNet, zrState))
+ if err != nil {
+ return nil, false
+ }
+
+ imOk := true
+ servers = FormatServers(servers)
+ ss := make([]*ServerStats, len(servers))
+
+ for i := range ss {
+ response, err := fourLetterWord(servers[i], "srvr", timeout)
+
+ if err != nil {
+ ss[i] = &ServerStats{Error: err}
+ imOk = false
+ continue
+ }
+
+ matches := re.FindAllStringSubmatch(string(response), -1)
+
+ if matches == nil {
+ err := fmt.Errorf("unable to parse fields from zookeeper response (no regex matches)")
+ ss[i] = &ServerStats{Error: err}
+ imOk = false
+ continue
+ }
+
+ match := matches[0][1:]
+
+ // determine current server
+ var srvrMode Mode
+ switch match[10] {
+ case "leader":
+ srvrMode = ModeLeader
+ case "follower":
+ srvrMode = ModeFollower
+ case "standalone":
+ srvrMode = ModeStandalone
+ default:
+ srvrMode = ModeUnknown
+ }
+
+ buildTime, err := time.Parse("01/02/2006 15:04 MST", match[1])
+
+ if err != nil {
+ ss[i] = &ServerStats{Error: err}
+ imOk = false
+ continue
+ }
+
+ parsedInt, err := strconv.ParseInt(match[9], 0, 64)
+
+ if err != nil {
+ ss[i] = &ServerStats{Error: err}
+ imOk = false
+ continue
+ }
+
+ // the ZxID value is an int64 with two int32s packed inside
+ // the high int32 is the epoch (i.e., number of leader elections)
+ // the low int32 is the counter
+ epoch := int32(parsedInt >> 32)
+ counter := int32(parsedInt & 0xFFFFFFFF)
+
+ // within the regex above, these values must be numerical
+ // so we can avoid useless checking of the error return value
+ minLatency, _ := strconv.ParseInt(match[2], 0, 64)
+ avgLatency, _ := strconv.ParseInt(match[3], 0, 64)
+ maxLatency, _ := strconv.ParseInt(match[4], 0, 64)
+ recv, _ := strconv.ParseInt(match[5], 0, 64)
+ sent, _ := strconv.ParseInt(match[6], 0, 64)
+ cons, _ := strconv.ParseInt(match[7], 0, 64)
+ outs, _ := strconv.ParseInt(match[8], 0, 64)
+ ncnt, _ := strconv.ParseInt(match[11], 0, 64)
+
+ ss[i] = &ServerStats{
+ Sent: sent,
+ Received: recv,
+ NodeCount: ncnt,
+ MinLatency: minLatency,
+ AvgLatency: avgLatency,
+ MaxLatency: maxLatency,
+ Connections: cons,
+ Outstanding: outs,
+ Epoch: epoch,
+ Counter: counter,
+ BuildTime: buildTime,
+ Mode: srvrMode,
+ Version: match[0],
+ }
+ }
+
+ return ss, imOk
+}
+
+// FLWRuok is a FourLetterWord helper function. In particular, this function
+// pulls the ruok output from each server.
+func FLWRuok(servers []string, timeout time.Duration) []bool {
+ servers = FormatServers(servers)
+ oks := make([]bool, len(servers))
+
+ for i := range oks {
+ response, err := fourLetterWord(servers[i], "ruok", timeout)
+
+ if err != nil {
+ continue
+ }
+
+ if bytes.Equal(response[:4], []byte("imok")) {
+ oks[i] = true
+ }
+ }
+ return oks
+}
+
+// FLWCons is a FourLetterWord helper function. In particular, this function
+// pulls the ruok output from each server.
+//
+// As with FLWSrvr, the boolean value indicates whether one of the requests had
+// an issue. The Clients struct has an Error value that can be checked.
+func FLWCons(servers []string, timeout time.Duration) ([]*ServerClients, bool) {
+ const (
+ zrAddr = `^ /((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?):(?:\d+))\[\d+\]`
+ zrPac = `\(queued=(\d+),recved=(\d+),sent=(\d+),sid=(0x[A-Za-z0-9]+),lop=(\w+),est=(\d+),to=(\d+),`
+ zrSesh = `lcxid=(0x[A-Za-z0-9]+),lzxid=(0x[A-Za-z0-9]+),lresp=(\d+),llat=(\d+),minlat=(\d+),avglat=(\d+),maxlat=(\d+)\)`
+ )
+
+ re, err := regexp.Compile(fmt.Sprintf("%v%v%v", zrAddr, zrPac, zrSesh))
+ if err != nil {
+ return nil, false
+ }
+
+ servers = FormatServers(servers)
+ sc := make([]*ServerClients, len(servers))
+ imOk := true
+
+ for i := range sc {
+ response, err := fourLetterWord(servers[i], "cons", timeout)
+
+ if err != nil {
+ sc[i] = &ServerClients{Error: err}
+ imOk = false
+ continue
+ }
+
+ scan := bufio.NewScanner(bytes.NewReader(response))
+
+ var clients []*ServerClient
+
+ for scan.Scan() {
+ line := scan.Bytes()
+
+ if len(line) == 0 {
+ continue
+ }
+
+ m := re.FindAllStringSubmatch(string(line), -1)
+
+ if m == nil {
+ err := fmt.Errorf("unable to parse fields from zookeeper response (no regex matches)")
+ sc[i] = &ServerClients{Error: err}
+ imOk = false
+ continue
+ }
+
+ match := m[0][1:]
+
+ queued, _ := strconv.ParseInt(match[1], 0, 64)
+ recvd, _ := strconv.ParseInt(match[2], 0, 64)
+ sent, _ := strconv.ParseInt(match[3], 0, 64)
+ sid, _ := strconv.ParseInt(match[4], 0, 64)
+ est, _ := strconv.ParseInt(match[6], 0, 64)
+ timeout, _ := strconv.ParseInt(match[7], 0, 32)
+ lcxid, _ := parseInt64(match[8])
+ lzxid, _ := parseInt64(match[9])
+ lresp, _ := strconv.ParseInt(match[10], 0, 64)
+ llat, _ := strconv.ParseInt(match[11], 0, 32)
+ minlat, _ := strconv.ParseInt(match[12], 0, 32)
+ avglat, _ := strconv.ParseInt(match[13], 0, 32)
+ maxlat, _ := strconv.ParseInt(match[14], 0, 32)
+
+ clients = append(clients, &ServerClient{
+ Queued: queued,
+ Received: recvd,
+ Sent: sent,
+ SessionID: sid,
+ Lcxid: int64(lcxid),
+ Lzxid: int64(lzxid),
+ Timeout: int32(timeout),
+ LastLatency: int32(llat),
+ MinLatency: int32(minlat),
+ AvgLatency: int32(avglat),
+ MaxLatency: int32(maxlat),
+ Established: time.Unix(est, 0),
+ LastResponse: time.Unix(lresp, 0),
+ Addr: match[0],
+ LastOperation: match[5],
+ })
+ }
+
+ sc[i] = &ServerClients{Clients: clients}
+ }
+
+ return sc, imOk
+}
+
+// parseInt64 is similar to strconv.ParseInt, but it also handles hex values that represent negative numbers
+func parseInt64(s string) (int64, error) {
+ if strings.HasPrefix(s, "0x") {
+ i, err := strconv.ParseUint(s, 0, 64)
+ return int64(i), err
+ }
+ return strconv.ParseInt(s, 0, 64)
+}
+
+func fourLetterWord(server, command string, timeout time.Duration) ([]byte, error) {
+ conn, err := net.DialTimeout("tcp", server, timeout)
+ if err != nil {
+ return nil, err
+ }
+
+ // the zookeeper server should automatically close this socket
+ // once the command has been processed, but better safe than sorry
+ defer conn.Close()
+
+ if err := conn.SetWriteDeadline(time.Now().Add(timeout)); err != nil {
+ return nil, err
+ }
+ _, err = conn.Write([]byte(command))
+ if err != nil {
+ return nil, err
+ }
+
+ if err := conn.SetReadDeadline(time.Now().Add(timeout)); err != nil {
+ return nil, err
+ }
+ return ioutil.ReadAll(conn)
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/lock.go b/vendor/github.com/samuel/go-zookeeper/zk/lock.go
new file mode 100644
index 00000000..3c35a427
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/lock.go
@@ -0,0 +1,150 @@
+package zk
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+var (
+ // ErrDeadlock is returned by Lock when trying to lock twice without unlocking first
+ ErrDeadlock = errors.New("zk: trying to acquire a lock twice")
+ // ErrNotLocked is returned by Unlock when trying to release a lock that has not first be acquired.
+ ErrNotLocked = errors.New("zk: not locked")
+)
+
+// Lock is a mutual exclusion lock.
+type Lock struct {
+ c *Conn
+ path string
+ acl []ACL
+ lockPath string
+ seq int
+}
+
+// NewLock creates a new lock instance using the provided connection, path, and acl.
+// The path must be a node that is only used by this lock. A lock instances starts
+// unlocked until Lock() is called.
+func NewLock(c *Conn, path string, acl []ACL) *Lock {
+ return &Lock{
+ c: c,
+ path: path,
+ acl: acl,
+ }
+}
+
+func parseSeq(path string) (int, error) {
+ parts := strings.Split(path, "-")
+ return strconv.Atoi(parts[len(parts)-1])
+}
+
+// Lock attempts to acquire the lock. It will wait to return until the lock
+// is acquired or an error occurs. If this instance already has the lock
+// then ErrDeadlock is returned.
+func (l *Lock) Lock() error {
+ if l.lockPath != "" {
+ return ErrDeadlock
+ }
+
+ prefix := fmt.Sprintf("%s/lock-", l.path)
+
+ path := ""
+ var err error
+ for i := 0; i < 3; i++ {
+ path, err = l.c.CreateProtectedEphemeralSequential(prefix, []byte{}, l.acl)
+ if err == ErrNoNode {
+ // Create parent node.
+ parts := strings.Split(l.path, "/")
+ pth := ""
+ for _, p := range parts[1:] {
+ var exists bool
+ pth += "/" + p
+ exists, _, err = l.c.Exists(pth)
+ if err != nil {
+ return err
+ }
+ if exists == true {
+ continue
+ }
+ _, err = l.c.Create(pth, []byte{}, 0, l.acl)
+ if err != nil && err != ErrNodeExists {
+ return err
+ }
+ }
+ } else if err == nil {
+ break
+ } else {
+ return err
+ }
+ }
+ if err != nil {
+ return err
+ }
+
+ seq, err := parseSeq(path)
+ if err != nil {
+ return err
+ }
+
+ for {
+ children, _, err := l.c.Children(l.path)
+ if err != nil {
+ return err
+ }
+
+ lowestSeq := seq
+ prevSeq := -1
+ prevSeqPath := ""
+ for _, p := range children {
+ s, err := parseSeq(p)
+ if err != nil {
+ return err
+ }
+ if s < lowestSeq {
+ lowestSeq = s
+ }
+ if s < seq && s > prevSeq {
+ prevSeq = s
+ prevSeqPath = p
+ }
+ }
+
+ if seq == lowestSeq {
+ // Acquired the lock
+ break
+ }
+
+ // Wait on the node next in line for the lock
+ _, _, ch, err := l.c.GetW(l.path + "/" + prevSeqPath)
+ if err != nil && err != ErrNoNode {
+ return err
+ } else if err != nil && err == ErrNoNode {
+ // try again
+ continue
+ }
+
+ ev := <-ch
+ if ev.Err != nil {
+ return ev.Err
+ }
+ }
+
+ l.seq = seq
+ l.lockPath = path
+ return nil
+}
+
+// Unlock releases an acquired lock. If the lock is not currently acquired by
+// this Lock instance than ErrNotLocked is returned.
+func (l *Lock) Unlock() error {
+ if l.lockPath == "" {
+ return ErrNotLocked
+ }
+ if err := l.c.Delete(l.lockPath, -1); err != nil {
+ return err
+ }
+ l.lockPath = ""
+ l.seq = 0
+ return nil
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/structs.go b/vendor/github.com/samuel/go-zookeeper/zk/structs.go
new file mode 100644
index 00000000..9400c3c0
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/structs.go
@@ -0,0 +1,624 @@
+package zk
+
+import (
+ "encoding/binary"
+ "errors"
+ "log"
+ "reflect"
+ "runtime"
+ "strings"
+ "time"
+)
+
+var (
+ ErrUnhandledFieldType = errors.New("zk: unhandled field type")
+ ErrPtrExpected = errors.New("zk: encode/decode expect a non-nil pointer to struct")
+ ErrShortBuffer = errors.New("zk: buffer too small")
+)
+
+type defaultLogger struct{}
+
+func (defaultLogger) Printf(format string, a ...interface{}) {
+ log.Printf(format, a...)
+}
+
+type ACL struct {
+ Perms int32
+ Scheme string
+ ID string
+}
+
+type Stat struct {
+ Czxid int64 // The zxid of the change that caused this znode to be created.
+ Mzxid int64 // The zxid of the change that last modified this znode.
+ Ctime int64 // The time in milliseconds from epoch when this znode was created.
+ Mtime int64 // The time in milliseconds from epoch when this znode was last modified.
+ Version int32 // The number of changes to the data of this znode.
+ Cversion int32 // The number of changes to the children of this znode.
+ Aversion int32 // The number of changes to the ACL of this znode.
+ EphemeralOwner int64 // The session id of the owner of this znode if the znode is an ephemeral node. If it is not an ephemeral node, it will be zero.
+ DataLength int32 // The length of the data field of this znode.
+ NumChildren int32 // The number of children of this znode.
+ Pzxid int64 // last modified children
+}
+
+// ServerClient is the information for a single Zookeeper client and its session.
+// This is used to parse/extract the output fo the `cons` command.
+type ServerClient struct {
+ Queued int64
+ Received int64
+ Sent int64
+ SessionID int64
+ Lcxid int64
+ Lzxid int64
+ Timeout int32
+ LastLatency int32
+ MinLatency int32
+ AvgLatency int32
+ MaxLatency int32
+ Established time.Time
+ LastResponse time.Time
+ Addr string
+ LastOperation string // maybe?
+ Error error
+}
+
+// ServerClients is a struct for the FLWCons() function. It's used to provide
+// the list of Clients.
+//
+// This is needed because FLWCons() takes multiple servers.
+type ServerClients struct {
+ Clients []*ServerClient
+ Error error
+}
+
+// ServerStats is the information pulled from the Zookeeper `stat` command.
+type ServerStats struct {
+ Sent int64
+ Received int64
+ NodeCount int64
+ MinLatency int64
+ AvgLatency int64
+ MaxLatency int64
+ Connections int64
+ Outstanding int64
+ Epoch int32
+ Counter int32
+ BuildTime time.Time
+ Mode Mode
+ Version string
+ Error error
+}
+
+type requestHeader struct {
+ Xid int32
+ Opcode int32
+}
+
+type responseHeader struct {
+ Xid int32
+ Zxid int64
+ Err ErrCode
+}
+
+type multiHeader struct {
+ Type int32
+ Done bool
+ Err ErrCode
+}
+
+type auth struct {
+ Type int32
+ Scheme string
+ Auth []byte
+}
+
+// Generic request structs
+
+type pathRequest struct {
+ Path string
+}
+
+type PathVersionRequest struct {
+ Path string
+ Version int32
+}
+
+type pathWatchRequest struct {
+ Path string
+ Watch bool
+}
+
+type pathResponse struct {
+ Path string
+}
+
+type statResponse struct {
+ Stat Stat
+}
+
+//
+
+type CheckVersionRequest PathVersionRequest
+type closeRequest struct{}
+type closeResponse struct{}
+
+type connectRequest struct {
+ ProtocolVersion int32
+ LastZxidSeen int64
+ TimeOut int32
+ SessionID int64
+ Passwd []byte
+}
+
+type connectResponse struct {
+ ProtocolVersion int32
+ TimeOut int32
+ SessionID int64
+ Passwd []byte
+}
+
+type CreateRequest struct {
+ Path string
+ Data []byte
+ Acl []ACL
+ Flags int32
+}
+
+type createResponse pathResponse
+type DeleteRequest PathVersionRequest
+type deleteResponse struct{}
+
+type errorResponse struct {
+ Err int32
+}
+
+type existsRequest pathWatchRequest
+type existsResponse statResponse
+type getAclRequest pathRequest
+
+type getAclResponse struct {
+ Acl []ACL
+ Stat Stat
+}
+
+type getChildrenRequest pathRequest
+
+type getChildrenResponse struct {
+ Children []string
+}
+
+type getChildren2Request pathWatchRequest
+
+type getChildren2Response struct {
+ Children []string
+ Stat Stat
+}
+
+type getDataRequest pathWatchRequest
+
+type getDataResponse struct {
+ Data []byte
+ Stat Stat
+}
+
+type getMaxChildrenRequest pathRequest
+
+type getMaxChildrenResponse struct {
+ Max int32
+}
+
+type getSaslRequest struct {
+ Token []byte
+}
+
+type pingRequest struct{}
+type pingResponse struct{}
+
+type setAclRequest struct {
+ Path string
+ Acl []ACL
+ Version int32
+}
+
+type setAclResponse statResponse
+
+type SetDataRequest struct {
+ Path string
+ Data []byte
+ Version int32
+}
+
+type setDataResponse statResponse
+
+type setMaxChildren struct {
+ Path string
+ Max int32
+}
+
+type setSaslRequest struct {
+ Token string
+}
+
+type setSaslResponse struct {
+ Token string
+}
+
+type setWatchesRequest struct {
+ RelativeZxid int64
+ DataWatches []string
+ ExistWatches []string
+ ChildWatches []string
+}
+
+type setWatchesResponse struct{}
+
+type syncRequest pathRequest
+type syncResponse pathResponse
+
+type setAuthRequest auth
+type setAuthResponse struct{}
+
+type multiRequestOp struct {
+ Header multiHeader
+ Op interface{}
+}
+type multiRequest struct {
+ Ops []multiRequestOp
+ DoneHeader multiHeader
+}
+type multiResponseOp struct {
+ Header multiHeader
+ String string
+ Stat *Stat
+ Err ErrCode
+}
+type multiResponse struct {
+ Ops []multiResponseOp
+ DoneHeader multiHeader
+}
+
+// zk version 3.5 reconfig API
+type reconfigRequest struct {
+ JoiningServers []byte
+ LeavingServers []byte
+ NewMembers []byte
+ // curConfigId version of the current configuration
+ // optional - causes reconfiguration to return an error if configuration is no longer current
+ CurConfigId int64
+}
+
+type reconfigReponse getDataResponse
+
+func (r *multiRequest) Encode(buf []byte) (int, error) {
+ total := 0
+ for _, op := range r.Ops {
+ op.Header.Done = false
+ n, err := encodePacketValue(buf[total:], reflect.ValueOf(op))
+ if err != nil {
+ return total, err
+ }
+ total += n
+ }
+ r.DoneHeader.Done = true
+ n, err := encodePacketValue(buf[total:], reflect.ValueOf(r.DoneHeader))
+ if err != nil {
+ return total, err
+ }
+ total += n
+
+ return total, nil
+}
+
+func (r *multiRequest) Decode(buf []byte) (int, error) {
+ r.Ops = make([]multiRequestOp, 0)
+ r.DoneHeader = multiHeader{-1, true, -1}
+ total := 0
+ for {
+ header := &multiHeader{}
+ n, err := decodePacketValue(buf[total:], reflect.ValueOf(header))
+ if err != nil {
+ return total, err
+ }
+ total += n
+ if header.Done {
+ r.DoneHeader = *header
+ break
+ }
+
+ req := requestStructForOp(header.Type)
+ if req == nil {
+ return total, ErrAPIError
+ }
+ n, err = decodePacketValue(buf[total:], reflect.ValueOf(req))
+ if err != nil {
+ return total, err
+ }
+ total += n
+ r.Ops = append(r.Ops, multiRequestOp{*header, req})
+ }
+ return total, nil
+}
+
+func (r *multiResponse) Decode(buf []byte) (int, error) {
+ var multiErr error
+
+ r.Ops = make([]multiResponseOp, 0)
+ r.DoneHeader = multiHeader{-1, true, -1}
+ total := 0
+ for {
+ header := &multiHeader{}
+ n, err := decodePacketValue(buf[total:], reflect.ValueOf(header))
+ if err != nil {
+ return total, err
+ }
+ total += n
+ if header.Done {
+ r.DoneHeader = *header
+ break
+ }
+
+ res := multiResponseOp{Header: *header}
+ var w reflect.Value
+ switch header.Type {
+ default:
+ return total, ErrAPIError
+ case opError:
+ w = reflect.ValueOf(&res.Err)
+ case opCreate:
+ w = reflect.ValueOf(&res.String)
+ case opSetData:
+ res.Stat = new(Stat)
+ w = reflect.ValueOf(res.Stat)
+ case opCheck, opDelete:
+ }
+ if w.IsValid() {
+ n, err := decodePacketValue(buf[total:], w)
+ if err != nil {
+ return total, err
+ }
+ total += n
+ }
+ r.Ops = append(r.Ops, res)
+ if multiErr == nil && res.Err != errOk {
+ // Use the first error as the error returned from Multi().
+ multiErr = res.Err.toError()
+ }
+ }
+ return total, multiErr
+}
+
+type watcherEvent struct {
+ Type EventType
+ State State
+ Path string
+}
+
+type decoder interface {
+ Decode(buf []byte) (int, error)
+}
+
+type encoder interface {
+ Encode(buf []byte) (int, error)
+}
+
+func decodePacket(buf []byte, st interface{}) (n int, err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ if e, ok := r.(runtime.Error); ok && strings.HasPrefix(e.Error(), "runtime error: slice bounds out of range") {
+ err = ErrShortBuffer
+ } else {
+ panic(r)
+ }
+ }
+ }()
+
+ v := reflect.ValueOf(st)
+ if v.Kind() != reflect.Ptr || v.IsNil() {
+ return 0, ErrPtrExpected
+ }
+ return decodePacketValue(buf, v)
+}
+
+func decodePacketValue(buf []byte, v reflect.Value) (int, error) {
+ rv := v
+ kind := v.Kind()
+ if kind == reflect.Ptr {
+ if v.IsNil() {
+ v.Set(reflect.New(v.Type().Elem()))
+ }
+ v = v.Elem()
+ kind = v.Kind()
+ }
+
+ n := 0
+ switch kind {
+ default:
+ return n, ErrUnhandledFieldType
+ case reflect.Struct:
+ if de, ok := rv.Interface().(decoder); ok {
+ return de.Decode(buf)
+ } else if de, ok := v.Interface().(decoder); ok {
+ return de.Decode(buf)
+ } else {
+ for i := 0; i < v.NumField(); i++ {
+ field := v.Field(i)
+ n2, err := decodePacketValue(buf[n:], field)
+ n += n2
+ if err != nil {
+ return n, err
+ }
+ }
+ }
+ case reflect.Bool:
+ v.SetBool(buf[n] != 0)
+ n++
+ case reflect.Int32:
+ v.SetInt(int64(binary.BigEndian.Uint32(buf[n : n+4])))
+ n += 4
+ case reflect.Int64:
+ v.SetInt(int64(binary.BigEndian.Uint64(buf[n : n+8])))
+ n += 8
+ case reflect.String:
+ ln := int(binary.BigEndian.Uint32(buf[n : n+4]))
+ v.SetString(string(buf[n+4 : n+4+ln]))
+ n += 4 + ln
+ case reflect.Slice:
+ switch v.Type().Elem().Kind() {
+ default:
+ count := int(binary.BigEndian.Uint32(buf[n : n+4]))
+ n += 4
+ values := reflect.MakeSlice(v.Type(), count, count)
+ v.Set(values)
+ for i := 0; i < count; i++ {
+ n2, err := decodePacketValue(buf[n:], values.Index(i))
+ n += n2
+ if err != nil {
+ return n, err
+ }
+ }
+ case reflect.Uint8:
+ ln := int(int32(binary.BigEndian.Uint32(buf[n : n+4])))
+ if ln < 0 {
+ n += 4
+ v.SetBytes(nil)
+ } else {
+ bytes := make([]byte, ln)
+ copy(bytes, buf[n+4:n+4+ln])
+ v.SetBytes(bytes)
+ n += 4 + ln
+ }
+ }
+ }
+ return n, nil
+}
+
+func encodePacket(buf []byte, st interface{}) (n int, err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ if e, ok := r.(runtime.Error); ok && strings.HasPrefix(e.Error(), "runtime error: slice bounds out of range") {
+ err = ErrShortBuffer
+ } else {
+ panic(r)
+ }
+ }
+ }()
+
+ v := reflect.ValueOf(st)
+ if v.Kind() != reflect.Ptr || v.IsNil() {
+ return 0, ErrPtrExpected
+ }
+ return encodePacketValue(buf, v)
+}
+
+func encodePacketValue(buf []byte, v reflect.Value) (int, error) {
+ rv := v
+ for v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface {
+ v = v.Elem()
+ }
+
+ n := 0
+ switch v.Kind() {
+ default:
+ return n, ErrUnhandledFieldType
+ case reflect.Struct:
+ if en, ok := rv.Interface().(encoder); ok {
+ return en.Encode(buf)
+ } else if en, ok := v.Interface().(encoder); ok {
+ return en.Encode(buf)
+ } else {
+ for i := 0; i < v.NumField(); i++ {
+ field := v.Field(i)
+ n2, err := encodePacketValue(buf[n:], field)
+ n += n2
+ if err != nil {
+ return n, err
+ }
+ }
+ }
+ case reflect.Bool:
+ if v.Bool() {
+ buf[n] = 1
+ } else {
+ buf[n] = 0
+ }
+ n++
+ case reflect.Int32:
+ binary.BigEndian.PutUint32(buf[n:n+4], uint32(v.Int()))
+ n += 4
+ case reflect.Int64:
+ binary.BigEndian.PutUint64(buf[n:n+8], uint64(v.Int()))
+ n += 8
+ case reflect.String:
+ str := v.String()
+ binary.BigEndian.PutUint32(buf[n:n+4], uint32(len(str)))
+ copy(buf[n+4:n+4+len(str)], []byte(str))
+ n += 4 + len(str)
+ case reflect.Slice:
+ switch v.Type().Elem().Kind() {
+ default:
+ count := v.Len()
+ startN := n
+ n += 4
+ for i := 0; i < count; i++ {
+ n2, err := encodePacketValue(buf[n:], v.Index(i))
+ n += n2
+ if err != nil {
+ return n, err
+ }
+ }
+ binary.BigEndian.PutUint32(buf[startN:startN+4], uint32(count))
+ case reflect.Uint8:
+ if v.IsNil() {
+ binary.BigEndian.PutUint32(buf[n:n+4], uint32(0xffffffff))
+ n += 4
+ } else {
+ bytes := v.Bytes()
+ binary.BigEndian.PutUint32(buf[n:n+4], uint32(len(bytes)))
+ copy(buf[n+4:n+4+len(bytes)], bytes)
+ n += 4 + len(bytes)
+ }
+ }
+ }
+ return n, nil
+}
+
+func requestStructForOp(op int32) interface{} {
+ switch op {
+ case opClose:
+ return &closeRequest{}
+ case opCreate:
+ return &CreateRequest{}
+ case opDelete:
+ return &DeleteRequest{}
+ case opExists:
+ return &existsRequest{}
+ case opGetAcl:
+ return &getAclRequest{}
+ case opGetChildren:
+ return &getChildrenRequest{}
+ case opGetChildren2:
+ return &getChildren2Request{}
+ case opGetData:
+ return &getDataRequest{}
+ case opPing:
+ return &pingRequest{}
+ case opSetAcl:
+ return &setAclRequest{}
+ case opSetData:
+ return &SetDataRequest{}
+ case opSetWatches:
+ return &setWatchesRequest{}
+ case opSync:
+ return &syncRequest{}
+ case opSetAuth:
+ return &setAuthRequest{}
+ case opCheck:
+ return &CheckVersionRequest{}
+ case opMulti:
+ return &multiRequest{}
+ case opReconfig:
+ return &reconfigRequest{}
+ }
+ return nil
+}
diff --git a/vendor/github.com/samuel/go-zookeeper/zk/util.go b/vendor/github.com/samuel/go-zookeeper/zk/util.go
new file mode 100644
index 00000000..f40a5b15
--- /dev/null
+++ b/vendor/github.com/samuel/go-zookeeper/zk/util.go
@@ -0,0 +1,116 @@
+package zk
+
+import (
+ "crypto/sha1"
+ "encoding/base64"
+ "fmt"
+ "math/rand"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+)
+
+// AuthACL produces an ACL list containing a single ACL which uses the
+// provided permissions, with the scheme "auth", and ID "", which is used
+// by ZooKeeper to represent any authenticated user.
+func AuthACL(perms int32) []ACL {
+ return []ACL{{perms, "auth", ""}}
+}
+
+// WorldACL produces an ACL list containing a single ACL which uses the
+// provided permissions, with the scheme "world", and ID "anyone", which
+// is used by ZooKeeper to represent any user at all.
+func WorldACL(perms int32) []ACL {
+ return []ACL{{perms, "world", "anyone"}}
+}
+
+func DigestACL(perms int32, user, password string) []ACL {
+ userPass := []byte(fmt.Sprintf("%s:%s", user, password))
+ h := sha1.New()
+ if n, err := h.Write(userPass); err != nil || n != len(userPass) {
+ panic("SHA1 failed")
+ }
+ digest := base64.StdEncoding.EncodeToString(h.Sum(nil))
+ return []ACL{{perms, "digest", fmt.Sprintf("%s:%s", user, digest)}}
+}
+
+// FormatServers takes a slice of addresses, and makes sure they are in a format
+// that resembles :. If the server has no port provided, the
+// DefaultPort constant is added to the end.
+func FormatServers(servers []string) []string {
+ for i := range servers {
+ if !strings.Contains(servers[i], ":") {
+ servers[i] = servers[i] + ":" + strconv.Itoa(DefaultPort)
+ }
+ }
+ return servers
+}
+
+// stringShuffle performs a Fisher-Yates shuffle on a slice of strings
+func stringShuffle(s []string) {
+ for i := len(s) - 1; i > 0; i-- {
+ j := rand.Intn(i + 1)
+ s[i], s[j] = s[j], s[i]
+ }
+}
+
+// validatePath will make sure a path is valid before sending the request
+func validatePath(path string, isSequential bool) error {
+ if path == "" {
+ return ErrInvalidPath
+ }
+
+ if path[0] != '/' {
+ return ErrInvalidPath
+ }
+
+ n := len(path)
+ if n == 1 {
+ // path is just the root
+ return nil
+ }
+
+ if !isSequential && path[n-1] == '/' {
+ return ErrInvalidPath
+ }
+
+ // Start at rune 1 since we already know that the first character is
+ // a '/'.
+ for i, w := 1, 0; i < n; i += w {
+ r, width := utf8.DecodeRuneInString(path[i:])
+ switch {
+ case r == '\u0000':
+ return ErrInvalidPath
+ case r == '/':
+ last, _ := utf8.DecodeLastRuneInString(path[:i])
+ if last == '/' {
+ return ErrInvalidPath
+ }
+ case r == '.':
+ last, lastWidth := utf8.DecodeLastRuneInString(path[:i])
+
+ // Check for double dot
+ if last == '.' {
+ last, _ = utf8.DecodeLastRuneInString(path[:i-lastWidth])
+ }
+
+ if last == '/' {
+ if i+1 == n {
+ return ErrInvalidPath
+ }
+
+ next, _ := utf8.DecodeRuneInString(path[i+w:])
+ if next == '/' {
+ return ErrInvalidPath
+ }
+ }
+ case r >= '\u0000' && r <= '\u001f',
+ r >= '\u007f' && r <= '\u009f',
+ r >= '\uf000' && r <= '\uf8ff',
+ r >= '\ufff0' && r < '\uffff':
+ return ErrInvalidPath
+ }
+ w = width
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/LICENSE b/vendor/github.com/syndtr/goleveldb/LICENSE
new file mode 100644
index 00000000..4a772d1a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/LICENSE
@@ -0,0 +1,24 @@
+Copyright 2012 Suryandaru Triandana
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
new file mode 100644
index 00000000..22592000
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrBatchCorrupted records reason of batch corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrBatchCorrupted struct {
+ Reason string
+}
+
+func (e *ErrBatchCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
+}
+
+func newErrBatchCorrupted(reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
+}
+
+const (
+ batchHeaderLen = 8 + 4
+ batchGrowRec = 3000
+ batchBufioSize = 16
+)
+
+// BatchReplay wraps basic batch operations.
+type BatchReplay interface {
+ Put(key, value []byte)
+ Delete(key []byte)
+}
+
+type batchIndex struct {
+ keyType keyType
+ keyPos, keyLen int
+ valuePos, valueLen int
+}
+
+func (index batchIndex) k(data []byte) []byte {
+ return data[index.keyPos : index.keyPos+index.keyLen]
+}
+
+func (index batchIndex) v(data []byte) []byte {
+ if index.valueLen != 0 {
+ return data[index.valuePos : index.valuePos+index.valueLen]
+ }
+ return nil
+}
+
+func (index batchIndex) kv(data []byte) (key, value []byte) {
+ return index.k(data), index.v(data)
+}
+
+// Batch is a write batch.
+type Batch struct {
+ data []byte
+ index []batchIndex
+
+ // internalLen is sums of key/value pair length plus 8-bytes internal key.
+ internalLen int
+}
+
+func (b *Batch) grow(n int) {
+ o := len(b.data)
+ if cap(b.data)-o < n {
+ div := 1
+ if len(b.index) > batchGrowRec {
+ div = len(b.index) / batchGrowRec
+ }
+ ndata := make([]byte, o, o+n+o/div)
+ copy(ndata, b.data)
+ b.data = ndata
+ }
+}
+
+func (b *Batch) appendRec(kt keyType, key, value []byte) {
+ n := 1 + binary.MaxVarintLen32 + len(key)
+ if kt == keyTypeVal {
+ n += binary.MaxVarintLen32 + len(value)
+ }
+ b.grow(n)
+ index := batchIndex{keyType: kt}
+ o := len(b.data)
+ data := b.data[:o+n]
+ data[o] = byte(kt)
+ o++
+ o += binary.PutUvarint(data[o:], uint64(len(key)))
+ index.keyPos = o
+ index.keyLen = len(key)
+ o += copy(data[o:], key)
+ if kt == keyTypeVal {
+ o += binary.PutUvarint(data[o:], uint64(len(value)))
+ index.valuePos = o
+ index.valueLen = len(value)
+ o += copy(data[o:], value)
+ }
+ b.data = data[:o]
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+}
+
+// Put appends 'put operation' of the given key/value pair to the batch.
+// It is safe to modify the contents of the argument after Put returns but not
+// before.
+func (b *Batch) Put(key, value []byte) {
+ b.appendRec(keyTypeVal, key, value)
+}
+
+// Delete appends 'delete operation' of the given key to the batch.
+// It is safe to modify the contents of the argument after Delete returns but
+// not before.
+func (b *Batch) Delete(key []byte) {
+ b.appendRec(keyTypeDel, key, nil)
+}
+
+// Dump dumps batch contents. The returned slice can be loaded into the
+// batch using Load method.
+// The returned slice is not its own copy, so the contents should not be
+// modified.
+func (b *Batch) Dump() []byte {
+ return b.data
+}
+
+// Load loads given slice into the batch. Previous contents of the batch
+// will be discarded.
+// The given slice will not be copied and will be used as batch buffer, so
+// it is not safe to modify the contents of the slice.
+func (b *Batch) Load(data []byte) error {
+ return b.decode(data, -1)
+}
+
+// Replay replays batch contents.
+func (b *Batch) Replay(r BatchReplay) error {
+ for _, index := range b.index {
+ switch index.keyType {
+ case keyTypeVal:
+ r.Put(index.k(b.data), index.v(b.data))
+ case keyTypeDel:
+ r.Delete(index.k(b.data))
+ }
+ }
+ return nil
+}
+
+// Len returns number of records in the batch.
+func (b *Batch) Len() int {
+ return len(b.index)
+}
+
+// Reset resets the batch.
+func (b *Batch) Reset() {
+ b.data = b.data[:0]
+ b.index = b.index[:0]
+ b.internalLen = 0
+}
+
+func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error {
+ for i, index := range b.index {
+ if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) append(p *Batch) {
+ ob := len(b.data)
+ oi := len(b.index)
+ b.data = append(b.data, p.data...)
+ b.index = append(b.index, p.index...)
+ b.internalLen += p.internalLen
+
+ // Updating index offset.
+ if ob != 0 {
+ for ; oi < len(b.index); oi++ {
+ index := &b.index[oi]
+ index.keyPos += ob
+ if index.valueLen != 0 {
+ index.valuePos += ob
+ }
+ }
+ }
+}
+
+func (b *Batch) decode(data []byte, expectedLen int) error {
+ b.data = data
+ b.index = b.index[:0]
+ b.internalLen = 0
+ err := decodeBatch(data, func(i int, index batchIndex) error {
+ b.index = append(b.index, index)
+ b.internalLen += index.keyLen + index.valueLen + 8
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ if expectedLen >= 0 && len(b.index) != expectedLen {
+ return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index)))
+ }
+ return nil
+}
+
+func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(b.data)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error {
+ var ik []byte
+ for i, index := range b.index {
+ ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+ if err := mdb.Delete(ik); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func newBatch() interface{} {
+ return &Batch{}
+}
+
+func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
+ var index batchIndex
+ for i, o := 0, 0; o < len(data); i++ {
+ // Key type.
+ index.keyType = keyType(data[o])
+ if index.keyType > keyTypeVal {
+ return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType)))
+ }
+ o++
+
+ // Key.
+ x, n := binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid key length")
+ }
+ index.keyPos = o
+ index.keyLen = int(x)
+ o += index.keyLen
+
+ // Value.
+ if index.keyType == keyTypeVal {
+ x, n = binary.Uvarint(data[o:])
+ o += n
+ if n <= 0 || o+int(x) > len(data) {
+ return newErrBatchCorrupted("bad record: invalid value length")
+ }
+ index.valuePos = o
+ index.valueLen = int(x)
+ o += index.valueLen
+ } else {
+ index.valuePos = 0
+ index.valueLen = 0
+ }
+
+ if err := fn(i, index); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) {
+ seq, batchLen, err = decodeBatchHeader(data)
+ if err != nil {
+ return 0, 0, err
+ }
+ if seq < expectSeq {
+ return 0, 0, newErrBatchCorrupted("invalid sequence number")
+ }
+ data = data[batchHeaderLen:]
+ var ik []byte
+ var decodedLen int
+ err = decodeBatch(data, func(i int, index batchIndex) error {
+ if i >= batchLen {
+ return newErrBatchCorrupted("invalid records length")
+ }
+ ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType)
+ if err := mdb.Put(ik, index.v(data)); err != nil {
+ return err
+ }
+ decodedLen++
+ return nil
+ })
+ if err == nil && decodedLen != batchLen {
+ err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen))
+ }
+ return
+}
+
+func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte {
+ dst = ensureBuffer(dst, batchHeaderLen)
+ binary.LittleEndian.PutUint64(dst, seq)
+ binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen))
+ return dst
+}
+
+func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) {
+ if len(data) < batchHeaderLen {
+ return 0, 0, newErrBatchCorrupted("too short")
+ }
+
+ seq = binary.LittleEndian.Uint64(data)
+ batchLen = int(binary.LittleEndian.Uint32(data[8:]))
+ if batchLen < 0 {
+ return 0, 0, newErrBatchCorrupted("invalid records length")
+ }
+ return
+}
+
+func batchesLen(batches []*Batch) int {
+ batchLen := 0
+ for _, batch := range batches {
+ batchLen += batch.Len()
+ }
+ return batchLen
+}
+
+func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error {
+ if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil {
+ return err
+ }
+ for _, batch := range batches {
+ if _, err := wr.Write(batch.data); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
new file mode 100644
index 00000000..c36ad323
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
@@ -0,0 +1,704 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package cache provides interface and implementation of a cache algorithms.
+package cache
+
+import (
+ "sync"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Cacher provides interface to implements a caching functionality.
+// An implementation must be safe for concurrent use.
+type Cacher interface {
+ // Capacity returns cache capacity.
+ Capacity() int
+
+ // SetCapacity sets cache capacity.
+ SetCapacity(capacity int)
+
+ // Promote promotes the 'cache node'.
+ Promote(n *Node)
+
+ // Ban evicts the 'cache node' and prevent subsequent 'promote'.
+ Ban(n *Node)
+
+ // Evict evicts the 'cache node'.
+ Evict(n *Node)
+
+ // EvictNS evicts 'cache node' with the given namespace.
+ EvictNS(ns uint64)
+
+ // EvictAll evicts all 'cache node'.
+ EvictAll()
+
+ // Close closes the 'cache tree'
+ Close() error
+}
+
+// Value is a 'cacheable object'. It may implements util.Releaser, if
+// so the the Release method will be called once object is released.
+type Value interface{}
+
+// NamespaceGetter provides convenient wrapper for namespace.
+type NamespaceGetter struct {
+ Cache *Cache
+ NS uint64
+}
+
+// Get simply calls Cache.Get() method.
+func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
+ return g.Cache.Get(g.NS, key, setFunc)
+}
+
+// The hash tables implementation is based on:
+// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
+// Kunlong Zhang, and Michael Spear.
+// ACM Symposium on Principles of Distributed Computing, Jul 2014.
+
+const (
+ mInitialSize = 1 << 4
+ mOverflowThreshold = 1 << 5
+ mOverflowGrowThreshold = 1 << 7
+)
+
+type mBucket struct {
+ mu sync.Mutex
+ node []*Node
+ frozen bool
+}
+
+func (b *mBucket) freeze() []*Node {
+ b.mu.Lock()
+ defer b.mu.Unlock()
+ if !b.frozen {
+ b.frozen = true
+ }
+ return b.node
+}
+
+func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ for _, n := range b.node {
+ if n.hash == hash && n.ns == ns && n.key == key {
+ atomic.AddInt32(&n.ref, 1)
+ b.mu.Unlock()
+ return true, false, n
+ }
+ }
+
+ // Get only.
+ if noset {
+ b.mu.Unlock()
+ return true, false, nil
+ }
+
+ // Create node.
+ n = &Node{
+ r: r,
+ hash: hash,
+ ns: ns,
+ key: key,
+ ref: 1,
+ }
+ // Add node to bucket.
+ b.node = append(b.node, n)
+ bLen := len(b.node)
+ b.mu.Unlock()
+
+ // Update counter.
+ grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
+ if bLen > mOverflowThreshold {
+ grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
+ }
+
+ // Grow.
+ if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) << 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+
+ return true, true, n
+}
+
+func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
+ b.mu.Lock()
+
+ if b.frozen {
+ b.mu.Unlock()
+ return
+ }
+
+ // Scan the node.
+ var (
+ n *Node
+ bLen int
+ )
+ for i := range b.node {
+ n = b.node[i]
+ if n.ns == ns && n.key == key {
+ if atomic.LoadInt32(&n.ref) == 0 {
+ deleted = true
+
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Remove node from bucket.
+ b.node = append(b.node[:i], b.node[i+1:]...)
+ bLen = len(b.node)
+ }
+ break
+ }
+ }
+ b.mu.Unlock()
+
+ if deleted {
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+
+ // Update counter.
+ atomic.AddInt32(&r.size, int32(n.size)*-1)
+ shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
+ if bLen >= mOverflowThreshold {
+ atomic.AddInt32(&h.overflow, -1)
+ }
+
+ // Shrink.
+ if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+ nhLen := len(h.buckets) >> 1
+ nh := &mNode{
+ buckets: make([]unsafe.Pointer, nhLen),
+ mask: uint32(nhLen) - 1,
+ pred: unsafe.Pointer(h),
+ growThreshold: int32(nhLen * mOverflowThreshold),
+ shrinkThreshold: int32(nhLen >> 1),
+ }
+ ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+ if !ok {
+ panic("BUG: failed swapping head")
+ }
+ go nh.initBuckets()
+ }
+ }
+
+ return true, deleted
+}
+
+type mNode struct {
+ buckets []unsafe.Pointer // []*mBucket
+ mask uint32
+ pred unsafe.Pointer // *mNode
+ resizeInProgess int32
+
+ overflow int32
+ growThreshold int32
+ shrinkThreshold int32
+}
+
+func (n *mNode) initBucket(i uint32) *mBucket {
+ if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
+ return b
+ }
+
+ p := (*mNode)(atomic.LoadPointer(&n.pred))
+ if p != nil {
+ var node []*Node
+ if n.mask > p.mask {
+ // Grow.
+ pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
+ if pb == nil {
+ pb = p.initBucket(i & p.mask)
+ }
+ m := pb.freeze()
+ // Split nodes.
+ for _, x := range m {
+ if x.hash&n.mask == i {
+ node = append(node, x)
+ }
+ }
+ } else {
+ // Shrink.
+ pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
+ if pb0 == nil {
+ pb0 = p.initBucket(i)
+ }
+ pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
+ if pb1 == nil {
+ pb1 = p.initBucket(i + uint32(len(n.buckets)))
+ }
+ m0 := pb0.freeze()
+ m1 := pb1.freeze()
+ // Merge nodes.
+ node = make([]*Node, 0, len(m0)+len(m1))
+ node = append(node, m0...)
+ node = append(node, m1...)
+ }
+ b := &mBucket{node: node}
+ if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
+ if len(node) > mOverflowThreshold {
+ atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
+ }
+ return b
+ }
+ }
+
+ return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
+}
+
+func (n *mNode) initBuckets() {
+ for i := range n.buckets {
+ n.initBucket(uint32(i))
+ }
+ atomic.StorePointer(&n.pred, nil)
+}
+
+// Cache is a 'cache map'.
+type Cache struct {
+ mu sync.RWMutex
+ mHead unsafe.Pointer // *mNode
+ nodes int32
+ size int32
+ cacher Cacher
+ closed bool
+}
+
+// NewCache creates a new 'cache map'. The cacher is optional and
+// may be nil.
+func NewCache(cacher Cacher) *Cache {
+ h := &mNode{
+ buckets: make([]unsafe.Pointer, mInitialSize),
+ mask: mInitialSize - 1,
+ growThreshold: int32(mInitialSize * mOverflowThreshold),
+ shrinkThreshold: 0,
+ }
+ for i := range h.buckets {
+ h.buckets[i] = unsafe.Pointer(&mBucket{})
+ }
+ r := &Cache{
+ mHead: unsafe.Pointer(h),
+ cacher: cacher,
+ }
+ return r
+}
+
+func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
+ h := (*mNode)(atomic.LoadPointer(&r.mHead))
+ i := hash & h.mask
+ b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
+ if b == nil {
+ b = h.initBucket(i)
+ }
+ return h, b
+}
+
+func (r *Cache) delete(n *Node) bool {
+ for {
+ h, b := r.getBucket(n.hash)
+ done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
+ if done {
+ return deleted
+ }
+ }
+}
+
+// Nodes returns number of 'cache node' in the map.
+func (r *Cache) Nodes() int {
+ return int(atomic.LoadInt32(&r.nodes))
+}
+
+// Size returns sums of 'cache node' size in the map.
+func (r *Cache) Size() int {
+ return int(atomic.LoadInt32(&r.size))
+}
+
+// Capacity returns cache capacity.
+func (r *Cache) Capacity() int {
+ if r.cacher == nil {
+ return 0
+ }
+ return r.cacher.Capacity()
+}
+
+// SetCapacity sets cache capacity.
+func (r *Cache) SetCapacity(capacity int) {
+ if r.cacher != nil {
+ r.cacher.SetCapacity(capacity)
+ }
+}
+
+// Get gets 'cache node' with the given namespace and key.
+// If cache node is not found and setFunc is not nil, Get will atomically creates
+// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
+//
+// The returned 'cache handle' should be released after use by calling Release
+// method.
+func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return nil
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
+ if done {
+ if n != nil {
+ n.mu.Lock()
+ if n.value == nil {
+ if setFunc == nil {
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+
+ n.size, n.value = setFunc()
+ if n.value == nil {
+ n.size = 0
+ n.mu.Unlock()
+ n.unref()
+ return nil
+ }
+ atomic.AddInt32(&r.size, int32(n.size))
+ }
+ n.mu.Unlock()
+ if r.cacher != nil {
+ r.cacher.Promote(n)
+ }
+ return &Handle{unsafe.Pointer(n)}
+ }
+
+ break
+ }
+ }
+ return nil
+}
+
+// Delete removes and ban 'cache node' with the given namespace and key.
+// A banned 'cache node' will never inserted into the 'cache tree'. Ban
+// only attributed to the particular 'cache node', so when a 'cache node'
+// is recreated it will not be banned.
+//
+// If onDel is not nil, then it will be executed if such 'cache node'
+// doesn't exist or once the 'cache node' is released.
+//
+// Delete return true is such 'cache node' exist.
+func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if onDel != nil {
+ n.mu.Lock()
+ n.onDel = append(n.onDel, onDel)
+ n.mu.Unlock()
+ }
+ if r.cacher != nil {
+ r.cacher.Ban(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ if onDel != nil {
+ onDel()
+ }
+
+ return false
+}
+
+// Evict evicts 'cache node' with the given namespace and key. This will
+// simply call Cacher.Evict.
+//
+// Evict return true is such 'cache node' exist.
+func (r *Cache) Evict(ns, key uint64) bool {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return false
+ }
+
+ hash := murmur32(ns, key, 0xf00)
+ for {
+ h, b := r.getBucket(hash)
+ done, _, n := b.get(r, h, hash, ns, key, true)
+ if done {
+ if n != nil {
+ if r.cacher != nil {
+ r.cacher.Evict(n)
+ }
+ n.unref()
+ return true
+ }
+
+ break
+ }
+ }
+
+ return false
+}
+
+// EvictNS evicts 'cache node' with the given namespace. This will
+// simply call Cacher.EvictNS.
+func (r *Cache) EvictNS(ns uint64) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictNS(ns)
+ }
+}
+
+// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
+func (r *Cache) EvictAll() {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+ if r.closed {
+ return
+ }
+
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ }
+}
+
+// Close closes the 'cache map' and forcefully releases all 'cache node'.
+func (r *Cache) Close() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+
+ h := (*mNode)(r.mHead)
+ h.initBuckets()
+
+ for i := range h.buckets {
+ b := (*mBucket)(h.buckets[i])
+ for _, n := range b.node {
+ // Call releaser.
+ if n.value != nil {
+ if r, ok := n.value.(util.Releaser); ok {
+ r.Release()
+ }
+ n.value = nil
+ }
+
+ // Call OnDel.
+ for _, f := range n.onDel {
+ f()
+ }
+ n.onDel = nil
+ }
+ }
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but
+// unlike Close it doesn't forcefully releases 'cache node'.
+func (r *Cache) CloseWeak() error {
+ r.mu.Lock()
+ if !r.closed {
+ r.closed = true
+ }
+ r.mu.Unlock()
+
+ // Avoid deadlock.
+ if r.cacher != nil {
+ r.cacher.EvictAll()
+ if err := r.cacher.Close(); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Node is a 'cache node'.
+type Node struct {
+ r *Cache
+
+ hash uint32
+ ns, key uint64
+
+ mu sync.Mutex
+ size int
+ value Value
+
+ ref int32
+ onDel []func()
+
+ CacheData unsafe.Pointer
+}
+
+// NS returns this 'cache node' namespace.
+func (n *Node) NS() uint64 {
+ return n.ns
+}
+
+// Key returns this 'cache node' key.
+func (n *Node) Key() uint64 {
+ return n.key
+}
+
+// Size returns this 'cache node' size.
+func (n *Node) Size() int {
+ return n.size
+}
+
+// Value returns this 'cache node' value.
+func (n *Node) Value() Value {
+ return n.value
+}
+
+// Ref returns this 'cache node' ref counter.
+func (n *Node) Ref() int32 {
+ return atomic.LoadInt32(&n.ref)
+}
+
+// GetHandle returns an handle for this 'cache node'.
+func (n *Node) GetHandle() *Handle {
+ if atomic.AddInt32(&n.ref, 1) <= 1 {
+ panic("BUG: Node.GetHandle on zero ref")
+ }
+ return &Handle{unsafe.Pointer(n)}
+}
+
+func (n *Node) unref() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.delete(n)
+ }
+}
+
+func (n *Node) unrefLocked() {
+ if atomic.AddInt32(&n.ref, -1) == 0 {
+ n.r.mu.RLock()
+ if !n.r.closed {
+ n.r.delete(n)
+ }
+ n.r.mu.RUnlock()
+ }
+}
+
+// Handle is a 'cache handle' of a 'cache node'.
+type Handle struct {
+ n unsafe.Pointer // *Node
+}
+
+// Value returns the value of the 'cache node'.
+func (h *Handle) Value() Value {
+ n := (*Node)(atomic.LoadPointer(&h.n))
+ if n != nil {
+ return n.value
+ }
+ return nil
+}
+
+// Release releases this 'cache handle'.
+// It is safe to call release multiple times.
+func (h *Handle) Release() {
+ nPtr := atomic.LoadPointer(&h.n)
+ if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
+ n := (*Node)(nPtr)
+ n.unrefLocked()
+ }
+}
+
+func murmur32(ns, key uint64, seed uint32) uint32 {
+ const (
+ m = uint32(0x5bd1e995)
+ r = 24
+ )
+
+ k1 := uint32(ns >> 32)
+ k2 := uint32(ns)
+ k3 := uint32(key >> 32)
+ k4 := uint32(key)
+
+ k1 *= m
+ k1 ^= k1 >> r
+ k1 *= m
+
+ k2 *= m
+ k2 ^= k2 >> r
+ k2 *= m
+
+ k3 *= m
+ k3 ^= k3 >> r
+ k3 *= m
+
+ k4 *= m
+ k4 ^= k4 >> r
+ k4 *= m
+
+ h := seed
+
+ h *= m
+ h ^= k1
+ h *= m
+ h ^= k2
+ h *= m
+ h ^= k3
+ h *= m
+ h ^= k4
+
+ h ^= h >> 13
+ h *= m
+ h ^= h >> 15
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
new file mode 100644
index 00000000..d9a84cde
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
@@ -0,0 +1,195 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package cache
+
+import (
+ "sync"
+ "unsafe"
+)
+
+type lruNode struct {
+ n *Node
+ h *Handle
+ ban bool
+
+ next, prev *lruNode
+}
+
+func (n *lruNode) insert(at *lruNode) {
+ x := at.next
+ at.next = n
+ n.prev = at
+ n.next = x
+ x.prev = n
+}
+
+func (n *lruNode) remove() {
+ if n.prev != nil {
+ n.prev.next = n.next
+ n.next.prev = n.prev
+ n.prev = nil
+ n.next = nil
+ } else {
+ panic("BUG: removing removed node")
+ }
+}
+
+type lru struct {
+ mu sync.Mutex
+ capacity int
+ used int
+ recent lruNode
+}
+
+func (r *lru) reset() {
+ r.recent.next = &r.recent
+ r.recent.prev = &r.recent
+ r.used = 0
+}
+
+func (r *lru) Capacity() int {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.capacity
+}
+
+func (r *lru) SetCapacity(capacity int) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ r.capacity = capacity
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Promote(n *Node) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ if n.CacheData == nil {
+ if n.Size() <= r.capacity {
+ rn := &lruNode{n: n, h: n.GetHandle()}
+ rn.insert(&r.recent)
+ n.CacheData = unsafe.Pointer(rn)
+ r.used += n.Size()
+
+ for r.used > r.capacity {
+ rn := r.recent.prev
+ if rn == nil {
+ panic("BUG: invalid LRU used or capacity counter")
+ }
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.insert(&r.recent)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Ban(n *Node) {
+ r.mu.Lock()
+ if n.CacheData == nil {
+ n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
+ } else {
+ rn := (*lruNode)(n.CacheData)
+ if !rn.ban {
+ rn.remove()
+ rn.ban = true
+ r.used -= rn.n.Size()
+ r.mu.Unlock()
+
+ rn.h.Release()
+ rn.h = nil
+ return
+ }
+ }
+ r.mu.Unlock()
+}
+
+func (r *lru) Evict(n *Node) {
+ r.mu.Lock()
+ rn := (*lruNode)(n.CacheData)
+ if rn == nil || rn.ban {
+ r.mu.Unlock()
+ return
+ }
+ n.CacheData = nil
+ r.mu.Unlock()
+
+ rn.h.Release()
+}
+
+func (r *lru) EvictNS(ns uint64) {
+ var evicted []*lruNode
+
+ r.mu.Lock()
+ for e := r.recent.prev; e != &r.recent; {
+ rn := e
+ e = e.prev
+ if rn.n.NS() == ns {
+ rn.remove()
+ rn.n.CacheData = nil
+ r.used -= rn.n.Size()
+ evicted = append(evicted, rn)
+ }
+ }
+ r.mu.Unlock()
+
+ for _, rn := range evicted {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) EvictAll() {
+ r.mu.Lock()
+ back := r.recent.prev
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.n.CacheData = nil
+ }
+ r.reset()
+ r.mu.Unlock()
+
+ for rn := back; rn != &r.recent; rn = rn.prev {
+ rn.h.Release()
+ }
+}
+
+func (r *lru) Close() error {
+ return nil
+}
+
+// NewLRU create a new LRU-cache.
+func NewLRU(capacity int) Cacher {
+ r := &lru{capacity: capacity}
+ r.reset()
+ return r
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
new file mode 100644
index 00000000..448402b8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
@@ -0,0 +1,67 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+)
+
+type iComparer struct {
+ ucmp comparer.Comparer
+}
+
+func (icmp *iComparer) uName() string {
+ return icmp.ucmp.Name()
+}
+
+func (icmp *iComparer) uCompare(a, b []byte) int {
+ return icmp.ucmp.Compare(a, b)
+}
+
+func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte {
+ return icmp.ucmp.Separator(dst, a, b)
+}
+
+func (icmp *iComparer) uSuccessor(dst, b []byte) []byte {
+ return icmp.ucmp.Successor(dst, b)
+}
+
+func (icmp *iComparer) Name() string {
+ return icmp.uName()
+}
+
+func (icmp *iComparer) Compare(a, b []byte) int {
+ x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey())
+ if x == 0 {
+ if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
+ return -1
+ } else if m < n {
+ return 1
+ }
+ }
+ return x
+}
+
+func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
+ ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
+ dst = icmp.uSeparator(dst, ua, ub)
+ if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
+
+func (icmp *iComparer) Successor(dst, b []byte) []byte {
+ ub := internalKey(b).ukey()
+ dst = icmp.uSuccessor(dst, ub)
+ if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
+ // Append earliest possible number.
+ return append(dst, keyMaxNumBytes...)
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
new file mode 100644
index 00000000..abf9fb65
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
@@ -0,0 +1,51 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package comparer
+
+import "bytes"
+
+type bytesComparer struct{}
+
+func (bytesComparer) Compare(a, b []byte) int {
+ return bytes.Compare(a, b)
+}
+
+func (bytesComparer) Name() string {
+ return "leveldb.BytewiseComparator"
+}
+
+func (bytesComparer) Separator(dst, a, b []byte) []byte {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for ; i < n && a[i] == b[i]; i++ {
+ }
+ if i >= n {
+ // Do not shorten if one string is a prefix of the other
+ } else if c := a[i]; c < 0xff && c+1 < b[i] {
+ dst = append(dst, a[:i+1]...)
+ dst[len(dst)-1]++
+ return dst
+ }
+ return nil
+}
+
+func (bytesComparer) Successor(dst, b []byte) []byte {
+ for i, c := range b {
+ if c != 0xff {
+ dst = append(dst, b[:i+1]...)
+ dst[len(dst)-1]++
+ return dst
+ }
+ }
+ return nil
+}
+
+// DefaultComparer are default implementation of the Comparer interface.
+// It uses the natural ordering, consistent with bytes.Compare.
+var DefaultComparer = bytesComparer{}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
new file mode 100644
index 00000000..2c522db2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
@@ -0,0 +1,57 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package comparer provides interface and implementation for ordering
+// sets of data.
+package comparer
+
+// BasicComparer is the interface that wraps the basic Compare method.
+type BasicComparer interface {
+ // Compare returns -1, 0, or +1 depending on whether a is 'less than',
+ // 'equal to' or 'greater than' b. The two arguments can only be 'equal'
+ // if their contents are exactly equal. Furthermore, the empty slice
+ // must be 'less than' any non-empty slice.
+ Compare(a, b []byte) int
+}
+
+// Comparer defines a total ordering over the space of []byte keys: a 'less
+// than' relationship.
+type Comparer interface {
+ BasicComparer
+
+ // Name returns name of the comparer.
+ //
+ // The Level-DB on-disk format stores the comparer name, and opening a
+ // database with a different comparer from the one it was created with
+ // will result in an error.
+ //
+ // An implementation to a new name whenever the comparer implementation
+ // changes in a way that will cause the relative ordering of any two keys
+ // to change.
+ //
+ // Names starting with "leveldb." are reserved and should not be used
+ // by any users of this package.
+ Name() string
+
+ // Bellow are advanced functions used to reduce the space requirements
+ // for internal data structures such as index blocks.
+
+ // Separator appends a sequence of bytes x to dst such that a <= x && x < b,
+ // where 'less than' is consistent with Compare. An implementation should
+ // return nil if x equal to a.
+ //
+ // Either contents of a or b should not by any means modified. Doing so
+ // may cause corruption on the internal state.
+ Separator(dst, a, b []byte) []byte
+
+ // Successor appends a sequence of bytes x to dst such that x >= b, where
+ // 'less than' is consistent with Compare. An implementation should return
+ // nil if x equal to b.
+ //
+ // Contents of b should not by any means modified. Doing so may cause
+ // corruption on the internal state.
+ Successor(dst, b []byte) []byte
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
new file mode 100644
index 00000000..90fedf7b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
@@ -0,0 +1,1179 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "io"
+ "os"
+ "runtime"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// DB is a LevelDB database.
+type DB struct {
+ // Need 64-bit alignment.
+ seq uint64
+
+ // Stats. Need 64-bit alignment.
+ cWriteDelay int64 // The cumulative duration of write delays
+ cWriteDelayN int32 // The cumulative number of write delays
+ inWritePaused int32 // The indicator whether write operation is paused by compaction
+ aliveSnaps, aliveIters int32
+
+ // Session.
+ s *session
+
+ // MemDB.
+ memMu sync.RWMutex
+ memPool chan *memdb.DB
+ mem, frozenMem *memDB
+ journal *journal.Writer
+ journalWriter storage.Writer
+ journalFd storage.FileDesc
+ frozenJournalFd storage.FileDesc
+ frozenSeq uint64
+
+ // Snapshot.
+ snapsMu sync.Mutex
+ snapsList *list.List
+
+ // Write.
+ batchPool sync.Pool
+ writeMergeC chan writeMerge
+ writeMergedC chan bool
+ writeLockC chan struct{}
+ writeAckC chan error
+ writeDelay time.Duration
+ writeDelayN int
+ tr *Transaction
+
+ // Compaction.
+ compCommitLk sync.Mutex
+ tcompCmdC chan cCmd
+ tcompPauseC chan chan<- struct{}
+ mcompCmdC chan cCmd
+ compErrC chan error
+ compPerErrC chan error
+ compErrSetC chan error
+ compWriteLocking bool
+ compStats cStats
+ memdbMaxLevel int // For testing.
+
+ // Close.
+ closeW sync.WaitGroup
+ closeC chan struct{}
+ closed uint32
+ closer io.Closer
+}
+
+func openDB(s *session) (*DB, error) {
+ s.log("db@open opening")
+ start := time.Now()
+ db := &DB{
+ s: s,
+ // Initial sequence
+ seq: s.stSeqNum,
+ // MemDB
+ memPool: make(chan *memdb.DB, 1),
+ // Snapshot
+ snapsList: list.New(),
+ // Write
+ batchPool: sync.Pool{New: newBatch},
+ writeMergeC: make(chan writeMerge),
+ writeMergedC: make(chan bool),
+ writeLockC: make(chan struct{}, 1),
+ writeAckC: make(chan error),
+ // Compaction
+ tcompCmdC: make(chan cCmd),
+ tcompPauseC: make(chan chan<- struct{}),
+ mcompCmdC: make(chan cCmd),
+ compErrC: make(chan error),
+ compPerErrC: make(chan error),
+ compErrSetC: make(chan error),
+ // Close
+ closeC: make(chan struct{}),
+ }
+
+ // Read-only mode.
+ readOnly := s.o.GetReadOnly()
+
+ if readOnly {
+ // Recover journals (read-only mode).
+ if err := db.recoverJournalRO(); err != nil {
+ return nil, err
+ }
+ } else {
+ // Recover journals.
+ if err := db.recoverJournal(); err != nil {
+ return nil, err
+ }
+
+ // Remove any obsolete files.
+ if err := db.checkAndCleanFiles(); err != nil {
+ // Close journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return nil, err
+ }
+
+ }
+
+ // Doesn't need to be included in the wait group.
+ go db.compactionError()
+ go db.mpoolDrain()
+
+ if readOnly {
+ db.SetReadOnly()
+ } else {
+ db.closeW.Add(2)
+ go db.tCompaction()
+ go db.mCompaction()
+ // go db.jWriter()
+ }
+
+ s.logf("db@open done T·%v", time.Since(start))
+
+ runtime.SetFinalizer(db, (*DB).Close)
+ return db, nil
+}
+
+// Open opens or creates a DB for the given storage.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist Open will returns
+// os.ErrExist error.
+//
+// Open will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = s.recover()
+ if err != nil {
+ if !os.IsNotExist(err) || s.o.GetErrorIfMissing() || s.o.GetReadOnly() {
+ return
+ }
+ err = s.create()
+ if err != nil {
+ return
+ }
+ } else if s.o.GetErrorIfExist() {
+ err = os.ErrExist
+ return
+ }
+
+ return openDB(s)
+}
+
+// OpenFile opens or creates a DB for the given path.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist OpenFile will returns
+// os.ErrExist error.
+//
+// OpenFile uses standard file-system backed storage implementation as
+// described in the leveldb/storage package.
+//
+// OpenFile will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func OpenFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, o.GetReadOnly())
+ if err != nil {
+ return
+ }
+ db, err = Open(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+// Recover recovers and opens a DB with missing or corrupted manifest files
+// for the given storage. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+ s, err := newSession(stor, o)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ s.close()
+ s.release()
+ }
+ }()
+
+ err = recoverTable(s, o)
+ if err != nil {
+ return
+ }
+ return openDB(s)
+}
+
+// RecoverFile recovers and opens a DB with missing or corrupted manifest files
+// for the given path. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// RecoverFile uses standard file-system backed storage implementation as described
+// in the leveldb/storage package.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
+ stor, err := storage.OpenFile(path, false)
+ if err != nil {
+ return
+ }
+ db, err = Recover(stor, o)
+ if err != nil {
+ stor.Close()
+ } else {
+ db.closer = stor
+ }
+ return
+}
+
+func recoverTable(s *session, o *opt.Options) error {
+ o = dupOptions(o)
+ // Mask StrictReader, lets StrictRecovery doing its job.
+ o.Strict &= ^opt.StrictReader
+
+ // Get all tables and sort it by file number.
+ fds, err := s.stor.List(storage.TypeTable)
+ if err != nil {
+ return err
+ }
+ sortFds(fds)
+
+ var (
+ maxSeq uint64
+ recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
+
+ // We will drop corrupted table.
+ strict = o.GetStrict(opt.StrictRecovery)
+ noSync = o.GetNoSync()
+
+ rec = &sessionRecord{}
+ bpool = util.NewBufferPool(o.GetBlockSize() + 5)
+ )
+ buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
+ tmpFd = s.newTemp()
+ writer, err := s.stor.Create(tmpFd)
+ if err != nil {
+ return
+ }
+ defer func() {
+ writer.Close()
+ if err != nil {
+ s.stor.Remove(tmpFd)
+ tmpFd = storage.FileDesc{}
+ }
+ }()
+
+ // Copy entries.
+ tw := table.NewWriter(writer, o)
+ for iter.Next() {
+ key := iter.Key()
+ if validInternalKey(key) {
+ err = tw.Append(key, iter.Value())
+ if err != nil {
+ return
+ }
+ }
+ }
+ err = iter.Error()
+ if err != nil && !errors.IsCorrupted(err) {
+ return
+ }
+ err = tw.Close()
+ if err != nil {
+ return
+ }
+ if !noSync {
+ err = writer.Sync()
+ if err != nil {
+ return
+ }
+ }
+ size = int64(tw.BytesLen())
+ return
+ }
+ recoverTable := func(fd storage.FileDesc) error {
+ s.logf("table@recovery recovering @%d", fd.Num)
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+ var closed bool
+ defer func() {
+ if !closed {
+ reader.Close()
+ }
+ }()
+
+ // Get file size.
+ size, err := reader.Seek(0, 2)
+ if err != nil {
+ return err
+ }
+
+ var (
+ tSeq uint64
+ tgoodKey, tcorruptedKey, tcorruptedBlock int
+ imin, imax []byte
+ )
+ tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
+ if err != nil {
+ return err
+ }
+ iter := tr.NewIterator(nil, nil)
+ if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
+ itererr.SetErrorCallback(func(err error) {
+ if errors.IsCorrupted(err) {
+ s.logf("table@recovery block corruption @%d %q", fd.Num, err)
+ tcorruptedBlock++
+ }
+ })
+ }
+
+ // Scan the table.
+ for iter.Next() {
+ key := iter.Key()
+ _, seq, _, kerr := parseInternalKey(key)
+ if kerr != nil {
+ tcorruptedKey++
+ continue
+ }
+ tgoodKey++
+ if seq > tSeq {
+ tSeq = seq
+ }
+ if imin == nil {
+ imin = append([]byte{}, key...)
+ }
+ imax = append(imax[:0], key...)
+ }
+ if err := iter.Error(); err != nil && !errors.IsCorrupted(err) {
+ iter.Release()
+ return err
+ }
+ iter.Release()
+
+ goodKey += tgoodKey
+ corruptedKey += tcorruptedKey
+ corruptedBlock += tcorruptedBlock
+
+ if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
+ droppedTable++
+ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ return nil
+ }
+
+ if tgoodKey > 0 {
+ if tcorruptedKey > 0 || tcorruptedBlock > 0 {
+ // Rebuild the table.
+ s.logf("table@recovery rebuilding @%d", fd.Num)
+ iter := tr.NewIterator(nil, nil)
+ tmpFd, newSize, err := buildTable(iter)
+ iter.Release()
+ if err != nil {
+ return err
+ }
+ closed = true
+ reader.Close()
+ if err := s.stor.Rename(tmpFd, fd); err != nil {
+ return err
+ }
+ size = newSize
+ }
+ if tSeq > maxSeq {
+ maxSeq = tSeq
+ }
+ recoveredKey += tgoodKey
+ // Add table to level 0.
+ rec.addTable(0, fd.Num, size, imin, imax)
+ s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+ } else {
+ droppedTable++
+ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
+ }
+
+ return nil
+ }
+
+ // Recover all tables.
+ if len(fds) > 0 {
+ s.logf("table@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ s.markFileNum(fds[len(fds)-1].Num)
+
+ for _, fd := range fds {
+ if err := recoverTable(fd); err != nil {
+ return err
+ }
+ }
+
+ s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
+ }
+
+ // Set sequence number.
+ rec.setSeqNum(maxSeq)
+
+ // Create new manifest.
+ if err := s.create(); err != nil {
+ return err
+ }
+
+ // Commit.
+ return s.commit(rec)
+}
+
+func (db *DB) recoverJournal() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ ofd storage.FileDesc // Obsolete file.
+ rec = &sessionRecord{}
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery F·%d", len(fds))
+
+ // Mark file number as used.
+ db.s.markFileNum(fds[len(fds)-1].Num)
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ jr *journal.Reader
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Flush memdb and remove obsolete journal file.
+ if !ofd.Zero() {
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+ }
+
+ rec.setJournalNum(fd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ fr.Close()
+ return err
+ }
+ rec.resetAddedTables()
+
+ db.s.stor.Remove(ofd)
+ ofd = storage.FileDesc{}
+ }
+
+ // Replay journal to memdb.
+ mdb.Reset()
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+
+ // Flush it if large enough.
+ if mdb.Size() >= writeBuffer {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ fr.Close()
+ return err
+ }
+
+ mdb.Reset()
+ }
+ }
+
+ fr.Close()
+ ofd = fd
+ }
+
+ // Flush the last memdb.
+ if mdb.Len() > 0 {
+ if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+ return err
+ }
+ }
+ }
+
+ // Create a new journal.
+ if _, err := db.newMem(0); err != nil {
+ return err
+ }
+
+ // Commit.
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.seq)
+ if err := db.s.commit(rec); err != nil {
+ // Close journal on error.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ }
+ return err
+ }
+
+ // Remove the last obsolete journal file.
+ if !ofd.Zero() {
+ db.s.stor.Remove(ofd)
+ }
+
+ return nil
+}
+
+func (db *DB) recoverJournalRO() error {
+ // Get all journals and sort it by file number.
+ rawFds, err := db.s.stor.List(storage.TypeJournal)
+ if err != nil {
+ return err
+ }
+ sortFds(rawFds)
+
+ // Journals that will be recovered.
+ var fds []storage.FileDesc
+ for _, fd := range rawFds {
+ if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+ fds = append(fds, fd)
+ }
+ }
+
+ var (
+ // Options.
+ strict = db.s.o.GetStrict(opt.StrictJournal)
+ checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
+ writeBuffer = db.s.o.GetWriteBuffer()
+
+ mdb = memdb.New(db.s.icmp, writeBuffer)
+ )
+
+ // Recover journals.
+ if len(fds) > 0 {
+ db.logf("journal@recovery RO·Mode F·%d", len(fds))
+
+ var (
+ jr *journal.Reader
+ buf = &util.Buffer{}
+ batchSeq uint64
+ batchLen int
+ )
+
+ for _, fd := range fds {
+ db.logf("journal@recovery recovering @%d", fd.Num)
+
+ fr, err := db.s.stor.Open(fd)
+ if err != nil {
+ return err
+ }
+
+ // Create or reset journal reader instance.
+ if jr == nil {
+ jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+ } else {
+ jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+ }
+
+ // Replay journal to memdb.
+ for {
+ r, err := jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ buf.Reset()
+ if _, err := buf.ReadFrom(r); err != nil {
+ if err == io.ErrUnexpectedEOF {
+ // This is error returned due to corruption, with strict == false.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+ batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+ if err != nil {
+ if !strict && errors.IsCorrupted(err) {
+ db.s.logf("journal error: %v (skipped)", err)
+ // We won't apply sequence number as it might be corrupted.
+ continue
+ }
+
+ fr.Close()
+ return errors.SetFd(err, fd)
+ }
+
+ // Save sequence number.
+ db.seq = batchSeq + uint64(batchLen)
+ }
+
+ fr.Close()
+ }
+ }
+
+ // Set memDB.
+ db.mem = &memDB{db: db, DB: mdb, ref: 1}
+
+ return nil
+}
+
+func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
+ mk, mv, err := mdb.Find(ikey)
+ if err == nil {
+ ukey, _, kt, kerr := parseInternalKey(mk)
+ if kerr != nil {
+ // Shouldn't have had happen.
+ panic(kerr)
+ }
+ if icmp.uCompare(ukey, ikey.ukey()) == 0 {
+ if kt == keyTypeDel {
+ return true, nil, ErrNotFound
+ }
+ return true, mv, nil
+
+ }
+ } else if err != ErrNotFound {
+ return true, nil, err
+ }
+ return
+}
+
+func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return append([]byte{}, mv...), me
+ }
+ }
+
+ v := db.s.version()
+ value, cSched, err := v.get(auxt, ikey, ro, false)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ return
+}
+
+func nilIfNotFound(err error) error {
+ if err == ErrNotFound {
+ return nil
+ }
+ return err
+}
+
+func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
+ ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+ if auxm != nil {
+ if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ em, fm := db.getMems()
+ for _, m := range [...]*memDB{em, fm} {
+ if m == nil {
+ continue
+ }
+ defer m.decref()
+
+ if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
+ return me == nil, nilIfNotFound(me)
+ }
+ }
+
+ v := db.s.version()
+ _, cSched, err := v.get(auxt, ikey, ro, true)
+ v.release()
+ if cSched {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ if err == nil {
+ ret = true
+ } else if err == ErrNotFound {
+ err = nil
+ }
+ return
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.get(nil, nil, key, se.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ return db.has(nil, nil, key, se.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the
+// underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+
+ se := db.acquireSnapshot()
+ defer db.releaseSnapshot(se)
+ // Iterator holds 'version' lock, 'version' is immutable so snapshot
+ // can be released after iterator created.
+ return db.newIterator(nil, nil, se.seq, slice, ro)
+}
+
+// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
+// is a frozen snapshot of a DB state at a particular point in time. The
+// content of snapshot are guaranteed to be consistent.
+//
+// The snapshot must be released after use, by calling Release method.
+func (db *DB) GetSnapshot() (*Snapshot, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ return db.newSnapshot(), nil
+}
+
+// GetProperty returns value of the given property name.
+//
+// Property names:
+// leveldb.num-files-at-level{n}
+// Returns the number of files at level 'n'.
+// leveldb.stats
+// Returns statistics of the underlying DB.
+// leveldb.iostats
+// Returns statistics of effective disk read and write.
+// leveldb.writedelay
+// Returns cumulative write delay caused by compaction.
+// leveldb.sstables
+// Returns sstables list for each level.
+// leveldb.blockpool
+// Returns block pool stats.
+// leveldb.cachedblock
+// Returns size of cached block.
+// leveldb.openedtables
+// Returns number of opened tables.
+// leveldb.alivesnaps
+// Returns number of alive snapshots.
+// leveldb.aliveiters
+// Returns number of alive iterators.
+func (db *DB) GetProperty(name string) (value string, err error) {
+ err = db.ok()
+ if err != nil {
+ return
+ }
+
+ const prefix = "leveldb."
+ if !strings.HasPrefix(name, prefix) {
+ return "", ErrNotFound
+ }
+ p := name[len(prefix):]
+
+ v := db.s.version()
+ defer v.release()
+
+ numFilesPrefix := "num-files-at-level"
+ switch {
+ case strings.HasPrefix(p, numFilesPrefix):
+ var level uint
+ var rest string
+ n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
+ if n != 1 {
+ err = ErrNotFound
+ } else {
+ value = fmt.Sprint(v.tLen(int(level)))
+ }
+ case p == "stats":
+ value = "Compactions\n" +
+ " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
+ "-------+------------+---------------+---------------+---------------+---------------\n"
+ for level, tables := range v.levels {
+ duration, read, write := db.compStats.getStat(level)
+ if len(tables) == 0 && duration == 0 {
+ continue
+ }
+ value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
+ level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
+ float64(read)/1048576.0, float64(write)/1048576.0)
+ }
+ case p == "iostats":
+ value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f",
+ float64(db.s.stor.reads())/1048576.0,
+ float64(db.s.stor.writes())/1048576.0)
+ case p == "writedelay":
+ writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay))
+ paused := atomic.LoadInt32(&db.inWritePaused) == 1
+ value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused)
+ case p == "sstables":
+ for level, tables := range v.levels {
+ value += fmt.Sprintf("--- level %d ---\n", level)
+ for _, t := range tables {
+ value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
+ }
+ }
+ case p == "blockpool":
+ value = fmt.Sprintf("%v", db.s.tops.bpool)
+ case p == "cachedblock":
+ if db.s.tops.bcache != nil {
+ value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
+ } else {
+ value = ""
+ }
+ case p == "openedtables":
+ value = fmt.Sprintf("%d", db.s.tops.cache.Size())
+ case p == "alivesnaps":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
+ case p == "aliveiters":
+ value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
+ default:
+ err = ErrNotFound
+ }
+
+ return
+}
+
+// DBStats is database statistics.
+type DBStats struct {
+ WriteDelayCount int32
+ WriteDelayDuration time.Duration
+ WritePaused bool
+
+ AliveSnapshots int32
+ AliveIterators int32
+
+ IOWrite uint64
+ IORead uint64
+
+ BlockCacheSize int
+ OpenedTablesCount int
+
+ LevelSizes []int64
+ LevelTablesCounts []int
+ LevelRead []int64
+ LevelWrite []int64
+ LevelDurations []time.Duration
+}
+
+// Stats populates s with database statistics.
+func (db *DB) Stats(s *DBStats) error {
+ err := db.ok()
+ if err != nil {
+ return err
+ }
+
+ s.IORead = db.s.stor.reads()
+ s.IOWrite = db.s.stor.writes()
+ s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN)
+ s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay))
+ s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1
+
+ s.OpenedTablesCount = db.s.tops.cache.Size()
+ if db.s.tops.bcache != nil {
+ s.BlockCacheSize = db.s.tops.bcache.Size()
+ } else {
+ s.BlockCacheSize = 0
+ }
+
+ s.AliveIterators = atomic.LoadInt32(&db.aliveIters)
+ s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps)
+
+ s.LevelDurations = s.LevelDurations[:0]
+ s.LevelRead = s.LevelRead[:0]
+ s.LevelWrite = s.LevelWrite[:0]
+ s.LevelSizes = s.LevelSizes[:0]
+ s.LevelTablesCounts = s.LevelTablesCounts[:0]
+
+ v := db.s.version()
+ defer v.release()
+
+ for level, tables := range v.levels {
+ duration, read, write := db.compStats.getStat(level)
+ if len(tables) == 0 && duration == 0 {
+ continue
+ }
+ s.LevelDurations = append(s.LevelDurations, duration)
+ s.LevelRead = append(s.LevelRead, read)
+ s.LevelWrite = append(s.LevelWrite, write)
+ s.LevelSizes = append(s.LevelSizes, tables.size())
+ s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables))
+ }
+
+ return nil
+}
+
+// SizeOf calculates approximate sizes of the given key ranges.
+// The length of the returned sizes are equal with the length of the given
+// ranges. The returned sizes measure storage space usage, so if the user
+// data compresses by a factor of ten, the returned sizes will be one-tenth
+// the size of the corresponding user data size.
+// The results may not include the sizes of recently written data.
+func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ v := db.s.version()
+ defer v.release()
+
+ sizes := make(Sizes, 0, len(ranges))
+ for _, r := range ranges {
+ imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
+ imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
+ start, err := v.offsetOf(imin)
+ if err != nil {
+ return nil, err
+ }
+ limit, err := v.offsetOf(imax)
+ if err != nil {
+ return nil, err
+ }
+ var size int64
+ if limit >= start {
+ size = limit - start
+ }
+ sizes = append(sizes, size)
+ }
+
+ return sizes, nil
+}
+
+// Close closes the DB. This will also releases any outstanding snapshot,
+// abort any in-flight compaction and discard open transaction.
+//
+// It is not safe to close a DB until all outstanding iterators are released.
+// It is valid to call Close multiple times. Other methods should not be
+// called after the DB has been closed.
+func (db *DB) Close() error {
+ if !db.setClosed() {
+ return ErrClosed
+ }
+
+ start := time.Now()
+ db.log("db@close closing")
+
+ // Clear the finalizer.
+ runtime.SetFinalizer(db, nil)
+
+ // Get compaction error.
+ var err error
+ select {
+ case err = <-db.compErrC:
+ if err == ErrReadOnly {
+ err = nil
+ }
+ default:
+ }
+
+ // Signal all goroutines.
+ close(db.closeC)
+
+ // Discard open transaction.
+ if db.tr != nil {
+ db.tr.Discard()
+ }
+
+ // Acquire writer lock.
+ db.writeLockC <- struct{}{}
+
+ // Wait for all gorotines to exit.
+ db.closeW.Wait()
+
+ // Closes journal.
+ if db.journal != nil {
+ db.journal.Close()
+ db.journalWriter.Close()
+ db.journal = nil
+ db.journalWriter = nil
+ }
+
+ if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ }
+
+ // Close session.
+ db.s.close()
+ db.logf("db@close done T·%v", time.Since(start))
+ db.s.release()
+
+ if db.closer != nil {
+ if err1 := db.closer.Close(); err == nil {
+ err = err1
+ }
+ db.closer = nil
+ }
+
+ // Clear memdbs.
+ db.clearMems()
+
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
new file mode 100644
index 00000000..0c1b9a53
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@@ -0,0 +1,854 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+ errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
+)
+
+type cStat struct {
+ duration time.Duration
+ read int64
+ write int64
+}
+
+func (p *cStat) add(n *cStatStaging) {
+ p.duration += n.duration
+ p.read += n.read
+ p.write += n.write
+}
+
+func (p *cStat) get() (duration time.Duration, read, write int64) {
+ return p.duration, p.read, p.write
+}
+
+type cStatStaging struct {
+ start time.Time
+ duration time.Duration
+ on bool
+ read int64
+ write int64
+}
+
+func (p *cStatStaging) startTimer() {
+ if !p.on {
+ p.start = time.Now()
+ p.on = true
+ }
+}
+
+func (p *cStatStaging) stopTimer() {
+ if p.on {
+ p.duration += time.Since(p.start)
+ p.on = false
+ }
+}
+
+type cStats struct {
+ lk sync.Mutex
+ stats []cStat
+}
+
+func (p *cStats) addStat(level int, n *cStatStaging) {
+ p.lk.Lock()
+ if level >= len(p.stats) {
+ newStats := make([]cStat, level+1)
+ copy(newStats, p.stats)
+ p.stats = newStats
+ }
+ p.stats[level].add(n)
+ p.lk.Unlock()
+}
+
+func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
+ p.lk.Lock()
+ defer p.lk.Unlock()
+ if level < len(p.stats) {
+ return p.stats[level].get()
+ }
+ return
+}
+
+func (db *DB) compactionError() {
+ var err error
+noerr:
+ // No error.
+ for {
+ select {
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ goto haserr
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+haserr:
+ // Transient error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case err = <-db.compErrSetC:
+ switch {
+ case err == nil:
+ goto noerr
+ case err == ErrReadOnly, errors.IsCorrupted(err):
+ goto hasperr
+ default:
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+hasperr:
+ // Persistent error.
+ for {
+ select {
+ case db.compErrC <- err:
+ case db.compPerErrC <- err:
+ case db.writeLockC <- struct{}{}:
+ // Hold write lock, so that write won't pass-through.
+ db.compWriteLocking = true
+ case <-db.closeC:
+ if db.compWriteLocking {
+ // We should release the lock or Close will hang.
+ <-db.writeLockC
+ }
+ return
+ }
+ }
+}
+
+type compactionTransactCounter int
+
+func (cnt *compactionTransactCounter) incr() {
+ *cnt++
+}
+
+type compactionTransactInterface interface {
+ run(cnt *compactionTransactCounter) error
+ revert() error
+}
+
+func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
+ defer func() {
+ if x := recover(); x != nil {
+ if x == errCompactionTransactExiting {
+ if err := t.revert(); err != nil {
+ db.logf("%s revert error %q", name, err)
+ }
+ }
+ panic(x)
+ }
+ }()
+
+ const (
+ backoffMin = 1 * time.Second
+ backoffMax = 8 * time.Second
+ backoffMul = 2 * time.Second
+ )
+ var (
+ backoff = backoffMin
+ backoffT = time.NewTimer(backoff)
+ lastCnt = compactionTransactCounter(0)
+
+ disableBackoff = db.s.o.GetDisableCompactionBackoff()
+ )
+ for n := 0; ; n++ {
+ // Check whether the DB is closed.
+ if db.isClosed() {
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ } else if n > 0 {
+ db.logf("%s retrying N·%d", name, n)
+ }
+
+ // Execute.
+ cnt := compactionTransactCounter(0)
+ err := t.run(&cnt)
+ if err != nil {
+ db.logf("%s error I·%d %q", name, cnt, err)
+ }
+
+ // Set compaction error status.
+ select {
+ case db.compErrSetC <- err:
+ case perr := <-db.compPerErrC:
+ if err != nil {
+ db.logf("%s exiting (persistent error %q)", name, perr)
+ db.compactionExitTransact()
+ }
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ if err == nil {
+ return
+ }
+ if errors.IsCorrupted(err) {
+ db.logf("%s exiting (corruption detected)", name)
+ db.compactionExitTransact()
+ }
+
+ if !disableBackoff {
+ // Reset backoff duration if counter is advancing.
+ if cnt > lastCnt {
+ backoff = backoffMin
+ lastCnt = cnt
+ }
+
+ // Backoff.
+ backoffT.Reset(backoff)
+ if backoff < backoffMax {
+ backoff *= backoffMul
+ if backoff > backoffMax {
+ backoff = backoffMax
+ }
+ }
+ select {
+ case <-backoffT.C:
+ case <-db.closeC:
+ db.logf("%s exiting", name)
+ db.compactionExitTransact()
+ }
+ }
+ }
+}
+
+type compactionTransactFunc struct {
+ runFunc func(cnt *compactionTransactCounter) error
+ revertFunc func() error
+}
+
+func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
+ return t.runFunc(cnt)
+}
+
+func (t *compactionTransactFunc) revert() error {
+ if t.revertFunc != nil {
+ return t.revertFunc()
+ }
+ return nil
+}
+
+func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
+ db.compactionTransact(name, &compactionTransactFunc{run, revert})
+}
+
+func (db *DB) compactionExitTransact() {
+ panic(errCompactionTransactExiting)
+}
+
+func (db *DB) compactionCommit(name string, rec *sessionRecord) {
+ db.compCommitLk.Lock()
+ defer db.compCommitLk.Unlock() // Defer is necessary.
+ db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
+ return db.s.commit(rec)
+ }, nil)
+}
+
+func (db *DB) memCompaction() {
+ mdb := db.getFrozenMem()
+ if mdb == nil {
+ return
+ }
+ defer mdb.decref()
+
+ db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
+
+ // Don't compact empty memdb.
+ if mdb.Len() == 0 {
+ db.logf("memdb@flush skipping")
+ // drop frozen memdb
+ db.dropFrozenMem()
+ return
+ }
+
+ // Pause table compaction.
+ resumeC := make(chan struct{})
+ select {
+ case db.tcompPauseC <- (chan<- struct{})(resumeC):
+ case <-db.compPerErrC:
+ close(resumeC)
+ resumeC = nil
+ case <-db.closeC:
+ db.compactionExitTransact()
+ }
+
+ var (
+ rec = &sessionRecord{}
+ stats = &cStatStaging{}
+ flushLevel int
+ )
+
+ // Generate tables.
+ db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
+ stats.startTimer()
+ flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
+ stats.stopTimer()
+ return
+ }, func() error {
+ for _, r := range rec.addedTables {
+ db.logf("memdb@flush revert @%d", r.num)
+ if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+
+ rec.setJournalNum(db.journalFd.Num)
+ rec.setSeqNum(db.frozenSeq)
+
+ // Commit.
+ stats.startTimer()
+ db.compactionCommit("memdb", rec)
+ stats.stopTimer()
+
+ db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
+
+ for _, r := range rec.addedTables {
+ stats.write += r.size
+ }
+ db.compStats.addStat(flushLevel, stats)
+
+ // Drop frozen memdb.
+ db.dropFrozenMem()
+
+ // Resume table compaction.
+ if resumeC != nil {
+ select {
+ case <-resumeC:
+ close(resumeC)
+ case <-db.closeC:
+ db.compactionExitTransact()
+ }
+ }
+
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+}
+
+type tableCompactionBuilder struct {
+ db *DB
+ s *session
+ c *compaction
+ rec *sessionRecord
+ stat0, stat1 *cStatStaging
+
+ snapHasLastUkey bool
+ snapLastUkey []byte
+ snapLastSeq uint64
+ snapIter int
+ snapKerrCnt int
+ snapDropCnt int
+
+ kerrCnt int
+ dropCnt int
+
+ minSeq uint64
+ strict bool
+ tableSize int
+
+ tw *tWriter
+}
+
+func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
+ // Create new table if not already.
+ if b.tw == nil {
+ // Check for pause event.
+ if b.db != nil {
+ select {
+ case ch := <-b.db.tcompPauseC:
+ b.db.pauseCompaction(ch)
+ case <-b.db.closeC:
+ b.db.compactionExitTransact()
+ default:
+ }
+ }
+
+ // Create new table.
+ var err error
+ b.tw, err = b.s.tops.create()
+ if err != nil {
+ return err
+ }
+ }
+
+ // Write key/value into table.
+ return b.tw.append(key, value)
+}
+
+func (b *tableCompactionBuilder) needFlush() bool {
+ return b.tw.tw.BytesLen() >= b.tableSize
+}
+
+func (b *tableCompactionBuilder) flush() error {
+ t, err := b.tw.finish()
+ if err != nil {
+ return err
+ }
+ b.rec.addTableFile(b.c.sourceLevel+1, t)
+ b.stat1.write += t.size
+ b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
+ b.tw = nil
+ return nil
+}
+
+func (b *tableCompactionBuilder) cleanup() {
+ if b.tw != nil {
+ b.tw.drop()
+ b.tw = nil
+ }
+}
+
+func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
+ snapResumed := b.snapIter > 0
+ hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
+ lastUkey := append([]byte{}, b.snapLastUkey...)
+ lastSeq := b.snapLastSeq
+ b.kerrCnt = b.snapKerrCnt
+ b.dropCnt = b.snapDropCnt
+ // Restore compaction state.
+ b.c.restore()
+
+ defer b.cleanup()
+
+ b.stat1.startTimer()
+ defer b.stat1.stopTimer()
+
+ iter := b.c.newIterator()
+ defer iter.Release()
+ for i := 0; iter.Next(); i++ {
+ // Incr transact counter.
+ cnt.incr()
+
+ // Skip until last state.
+ if i < b.snapIter {
+ continue
+ }
+
+ resumed := false
+ if snapResumed {
+ resumed = true
+ snapResumed = false
+ }
+
+ ikey := iter.Key()
+ ukey, seq, kt, kerr := parseInternalKey(ikey)
+
+ if kerr == nil {
+ shouldStop := !resumed && b.c.shouldStopBefore(ikey)
+
+ if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
+ // First occurrence of this user key.
+
+ // Only rotate tables if ukey doesn't hop across.
+ if b.tw != nil && (shouldStop || b.needFlush()) {
+ if err := b.flush(); err != nil {
+ return err
+ }
+
+ // Creates snapshot of the state.
+ b.c.save()
+ b.snapHasLastUkey = hasLastUkey
+ b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
+ b.snapLastSeq = lastSeq
+ b.snapIter = i
+ b.snapKerrCnt = b.kerrCnt
+ b.snapDropCnt = b.dropCnt
+ }
+
+ hasLastUkey = true
+ lastUkey = append(lastUkey[:0], ukey...)
+ lastSeq = keyMaxSeq
+ }
+
+ switch {
+ case lastSeq <= b.minSeq:
+ // Dropped because newer entry for same user key exist
+ fallthrough // (A)
+ case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
+ // For this user key:
+ // (1) there is no data in higher levels
+ // (2) data in lower levels will have larger seq numbers
+ // (3) data in layers that are being compacted here and have
+ // smaller seq numbers will be dropped in the next
+ // few iterations of this loop (by rule (A) above).
+ // Therefore this deletion marker is obsolete and can be dropped.
+ lastSeq = seq
+ b.dropCnt++
+ continue
+ default:
+ lastSeq = seq
+ }
+ } else {
+ if b.strict {
+ return kerr
+ }
+
+ // Don't drop corrupted keys.
+ hasLastUkey = false
+ lastUkey = lastUkey[:0]
+ lastSeq = keyMaxSeq
+ b.kerrCnt++
+ }
+
+ if err := b.appendKV(ikey, iter.Value()); err != nil {
+ return err
+ }
+ }
+
+ if err := iter.Error(); err != nil {
+ return err
+ }
+
+ // Finish last table.
+ if b.tw != nil && !b.tw.empty() {
+ return b.flush()
+ }
+ return nil
+}
+
+func (b *tableCompactionBuilder) revert() error {
+ for _, at := range b.rec.addedTables {
+ b.s.logf("table@build revert @%d", at.num)
+ if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
+ defer c.release()
+
+ rec := &sessionRecord{}
+ rec.addCompPtr(c.sourceLevel, c.imax)
+
+ if !noTrivial && c.trivial() {
+ t := c.levels[0][0]
+ db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
+ rec.delTable(c.sourceLevel, t.fd.Num)
+ rec.addTableFile(c.sourceLevel+1, t)
+ db.compactionCommit("table-move", rec)
+ return
+ }
+
+ var stats [2]cStatStaging
+ for i, tables := range c.levels {
+ for _, t := range tables {
+ stats[i].read += t.size
+ // Insert deleted tables into record
+ rec.delTable(c.sourceLevel+i, t.fd.Num)
+ }
+ }
+ sourceSize := int(stats[0].read + stats[1].read)
+ minSeq := db.minSeq()
+ db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
+
+ b := &tableCompactionBuilder{
+ db: db,
+ s: db.s,
+ c: c,
+ rec: rec,
+ stat1: &stats[1],
+ minSeq: minSeq,
+ strict: db.s.o.GetStrict(opt.StrictCompaction),
+ tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
+ }
+ db.compactionTransact("table@build", b)
+
+ // Commit.
+ stats[1].startTimer()
+ db.compactionCommit("table", rec)
+ stats[1].stopTimer()
+
+ resultSize := int(stats[1].write)
+ db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
+
+ // Save compaction stats
+ for i := range stats {
+ db.compStats.addStat(c.sourceLevel+1, &stats[i])
+ }
+}
+
+func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
+ db.logf("table@compaction range L%d %q:%q", level, umin, umax)
+ if level >= 0 {
+ if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
+ db.tableCompaction(c, true)
+ }
+ } else {
+ // Retry until nothing to compact.
+ for {
+ compacted := false
+
+ // Scan for maximum level with overlapped tables.
+ v := db.s.version()
+ m := 1
+ for i := m; i < len(v.levels); i++ {
+ tables := v.levels[i]
+ if tables.overlaps(db.s.icmp, umin, umax, false) {
+ m = i
+ }
+ }
+ v.release()
+
+ for level := 0; level < m; level++ {
+ if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
+ db.tableCompaction(c, true)
+ compacted = true
+ }
+ }
+
+ if !compacted {
+ break
+ }
+ }
+ }
+
+ return nil
+}
+
+func (db *DB) tableAutoCompaction() {
+ if c := db.s.pickCompaction(); c != nil {
+ db.tableCompaction(c, false)
+ }
+}
+
+func (db *DB) tableNeedCompaction() bool {
+ v := db.s.version()
+ defer v.release()
+ return v.needCompaction()
+}
+
+// resumeWrite returns an indicator whether we should resume write operation if enough level0 files are compacted.
+func (db *DB) resumeWrite() bool {
+ v := db.s.version()
+ defer v.release()
+ if v.tLen(0) < db.s.o.GetWriteL0PauseTrigger() {
+ return true
+ }
+ return false
+}
+
+func (db *DB) pauseCompaction(ch chan<- struct{}) {
+ select {
+ case ch <- struct{}{}:
+ case <-db.closeC:
+ db.compactionExitTransact()
+ }
+}
+
+type cCmd interface {
+ ack(err error)
+}
+
+type cAuto struct {
+ // Note for table compaction, an non-empty ackC represents it's a compaction waiting command.
+ ackC chan<- error
+}
+
+func (r cAuto) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+type cRange struct {
+ level int
+ min, max []byte
+ ackC chan<- error
+}
+
+func (r cRange) ack(err error) {
+ if r.ackC != nil {
+ defer func() {
+ recover()
+ }()
+ r.ackC <- err
+ }
+}
+
+// This will trigger auto compaction but will not wait for it.
+func (db *DB) compTrigger(compC chan<- cCmd) {
+ select {
+ case compC <- cAuto{}:
+ default:
+ }
+}
+
+// This will trigger auto compaction and/or wait for all compaction to be done.
+func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cAuto{ch}:
+ case err = <-db.compErrC:
+ return
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+// Send range compaction request.
+func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
+ ch := make(chan error)
+ defer close(ch)
+ // Send cmd.
+ select {
+ case compC <- cRange{level, min, max, ch}:
+ case err := <-db.compErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+ // Wait cmd.
+ select {
+ case err = <-ch:
+ case err = <-db.compErrC:
+ case <-db.closeC:
+ return ErrClosed
+ }
+ return err
+}
+
+func (db *DB) mCompaction() {
+ var x cCmd
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ select {
+ case x = <-db.mcompCmdC:
+ switch x.(type) {
+ case cAuto:
+ db.memCompaction()
+ x.ack(nil)
+ x = nil
+ default:
+ panic("leveldb: unknown command")
+ }
+ case <-db.closeC:
+ return
+ }
+ }
+}
+
+func (db *DB) tCompaction() {
+ var (
+ x cCmd
+ waitQ []cCmd
+ )
+
+ defer func() {
+ if x := recover(); x != nil {
+ if x != errCompactionTransactExiting {
+ panic(x)
+ }
+ }
+ for i := range waitQ {
+ waitQ[i].ack(ErrClosed)
+ waitQ[i] = nil
+ }
+ if x != nil {
+ x.ack(ErrClosed)
+ }
+ db.closeW.Done()
+ }()
+
+ for {
+ if db.tableNeedCompaction() {
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ default:
+ }
+ // Resume write operation as soon as possible.
+ if len(waitQ) > 0 && db.resumeWrite() {
+ for i := range waitQ {
+ waitQ[i].ack(nil)
+ waitQ[i] = nil
+ }
+ waitQ = waitQ[:0]
+ }
+ } else {
+ for i := range waitQ {
+ waitQ[i].ack(nil)
+ waitQ[i] = nil
+ }
+ waitQ = waitQ[:0]
+ select {
+ case x = <-db.tcompCmdC:
+ case ch := <-db.tcompPauseC:
+ db.pauseCompaction(ch)
+ continue
+ case <-db.closeC:
+ return
+ }
+ }
+ if x != nil {
+ switch cmd := x.(type) {
+ case cAuto:
+ if cmd.ackC != nil {
+ // Check the write pause state before caching it.
+ if db.resumeWrite() {
+ x.ack(nil)
+ } else {
+ waitQ = append(waitQ, x)
+ }
+ }
+ case cRange:
+ x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
+ default:
+ panic("leveldb: unknown command")
+ }
+ x = nil
+ }
+ db.tableAutoCompaction()
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
new file mode 100644
index 00000000..03c24cda
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
@@ -0,0 +1,360 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "math/rand"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
+)
+
+type memdbReleaser struct {
+ once sync.Once
+ m *memDB
+}
+
+func (mr *memdbReleaser) Release() {
+ mr.once.Do(func() {
+ mr.m.decref()
+ })
+}
+
+func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
+ em, fm := db.getMems()
+ v := db.s.version()
+
+ tableIts := v.getIterators(slice, ro)
+ n := len(tableIts) + len(auxt) + 3
+ its := make([]iterator.Iterator, 0, n)
+
+ if auxm != nil {
+ ami := auxm.NewIterator(slice)
+ ami.SetReleaser(&memdbReleaser{m: auxm})
+ its = append(its, ami)
+ }
+ for _, t := range auxt {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+
+ emi := em.NewIterator(slice)
+ emi.SetReleaser(&memdbReleaser{m: em})
+ its = append(its, emi)
+ if fm != nil {
+ fmi := fm.NewIterator(slice)
+ fmi.SetReleaser(&memdbReleaser{m: fm})
+ its = append(its, fmi)
+ }
+ its = append(its, tableIts...)
+ mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
+ mi.SetReleaser(&versionReleaser{v: v})
+ return mi
+}
+
+func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
+ var islice *util.Range
+ if slice != nil {
+ islice = &util.Range{}
+ if slice.Start != nil {
+ islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
+ }
+ if slice.Limit != nil {
+ islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
+ }
+ }
+ rawIter := db.newRawIterator(auxm, auxt, islice, ro)
+ iter := &dbIter{
+ db: db,
+ icmp: db.s.icmp,
+ iter: rawIter,
+ seq: seq,
+ strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
+ key: make([]byte, 0),
+ value: make([]byte, 0),
+ }
+ atomic.AddInt32(&db.aliveIters, 1)
+ runtime.SetFinalizer(iter, (*dbIter).Release)
+ return iter
+}
+
+func (db *DB) iterSamplingRate() int {
+ return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+// dbIter represent an interator states over a database session.
+type dbIter struct {
+ db *DB
+ icmp *iComparer
+ iter iterator.Iterator
+ seq uint64
+ strict bool
+
+ smaplingGap int
+ dir dir
+ key []byte
+ value []byte
+ err error
+ releaser util.Releaser
+}
+
+func (i *dbIter) sampleSeek() {
+ ikey := i.iter.Key()
+ i.smaplingGap -= len(ikey) + len(i.iter.Value())
+ for i.smaplingGap < 0 {
+ i.smaplingGap += i.db.iterSamplingRate()
+ i.db.sampleSeek(ikey)
+ }
+}
+
+func (i *dbIter) setErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+}
+
+func (i *dbIter) iterErr() {
+ if err := i.iter.Error(); err != nil {
+ i.setErr(err)
+ }
+}
+
+func (i *dbIter) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *dbIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.First() {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.iter.Last() {
+ return i.prev()
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
+ if i.iter.Seek(ikey) {
+ i.dir = dirSOI
+ return i.next()
+ }
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+}
+
+func (i *dbIter) next() bool {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ switch kt {
+ case keyTypeDel:
+ // Skip deleted key.
+ i.key = append(i.key[:0], ukey...)
+ i.dir = dirForward
+ case keyTypeVal:
+ if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ i.dir = dirForward
+ return true
+ }
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ break
+ }
+ if !i.iter.Next() {
+ i.dir = dirEOI
+ i.iterErr()
+ break
+ }
+ }
+ return false
+}
+
+func (i *dbIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) {
+ i.dir = dirEOI
+ i.iterErr()
+ return false
+ }
+ return i.next()
+}
+
+func (i *dbIter) prev() bool {
+ i.dir = dirBackward
+ del := true
+ if i.iter.Valid() {
+ for {
+ if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if seq <= i.seq {
+ if !del && i.icmp.uCompare(ukey, i.key) < 0 {
+ return true
+ }
+ del = (kt == keyTypeDel)
+ if !del {
+ i.key = append(i.key[:0], ukey...)
+ i.value = append(i.value[:0], i.iter.Value()...)
+ }
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ if !i.iter.Prev() {
+ break
+ }
+ }
+ }
+ if del {
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+ return true
+}
+
+func (i *dbIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ for i.iter.Prev() {
+ if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+ i.sampleSeek()
+ if i.icmp.uCompare(ukey, i.key) < 0 {
+ goto cont
+ }
+ } else if i.strict {
+ i.setErr(kerr)
+ return false
+ }
+ }
+ i.dir = dirSOI
+ i.iterErr()
+ return false
+ }
+
+cont:
+ return i.prev()
+}
+
+func (i *dbIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *dbIter) Release() {
+ if i.dir != dirReleased {
+ // Clear the finalizer.
+ runtime.SetFinalizer(i, nil)
+
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+
+ i.dir = dirReleased
+ i.key = nil
+ i.value = nil
+ i.iter.Release()
+ i.iter = nil
+ atomic.AddInt32(&i.db.aliveIters, -1)
+ i.db = nil
+ }
+}
+
+func (i *dbIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *dbIter) Error() error {
+ return i.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
new file mode 100644
index 00000000..c2ad70c8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
@@ -0,0 +1,187 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "container/list"
+ "fmt"
+ "runtime"
+ "sync"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type snapshotElement struct {
+ seq uint64
+ ref int
+ e *list.Element
+}
+
+// Acquires a snapshot, based on latest sequence.
+func (db *DB) acquireSnapshot() *snapshotElement {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ seq := db.getSeq()
+
+ if e := db.snapsList.Back(); e != nil {
+ se := e.Value.(*snapshotElement)
+ if se.seq == seq {
+ se.ref++
+ return se
+ } else if seq < se.seq {
+ panic("leveldb: sequence number is not increasing")
+ }
+ }
+ se := &snapshotElement{seq: seq, ref: 1}
+ se.e = db.snapsList.PushBack(se)
+ return se
+}
+
+// Releases given snapshot element.
+func (db *DB) releaseSnapshot(se *snapshotElement) {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ se.ref--
+ if se.ref == 0 {
+ db.snapsList.Remove(se.e)
+ se.e = nil
+ } else if se.ref < 0 {
+ panic("leveldb: Snapshot: negative element reference")
+ }
+}
+
+// Gets minimum sequence that not being snapshotted.
+func (db *DB) minSeq() uint64 {
+ db.snapsMu.Lock()
+ defer db.snapsMu.Unlock()
+
+ if e := db.snapsList.Front(); e != nil {
+ return e.Value.(*snapshotElement).seq
+ }
+
+ return db.getSeq()
+}
+
+// Snapshot is a DB snapshot.
+type Snapshot struct {
+ db *DB
+ elem *snapshotElement
+ mu sync.RWMutex
+ released bool
+}
+
+// Creates new snapshot object.
+func (db *DB) newSnapshot() *Snapshot {
+ snap := &Snapshot{
+ db: db,
+ elem: db.acquireSnapshot(),
+ }
+ atomic.AddInt32(&db.aliveSnaps, 1)
+ runtime.SetFinalizer(snap, (*Snapshot).Release)
+ return snap
+}
+
+func (snap *Snapshot) String() string {
+ return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if
+// the DB does not contains the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.get(nil, nil, key, snap.elem.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+ err = snap.db.ok()
+ if err != nil {
+ return
+ }
+ snap.mu.RLock()
+ defer snap.mu.RUnlock()
+ if snap.released {
+ err = ErrSnapshotReleased
+ return
+ }
+ return snap.db.has(nil, nil, key, snap.elem.seq, ro)
+}
+
+// NewIterator returns an iterator for the snapshot of the underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Value() methods), its content should not be
+// modified unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+// Releasing the snapshot doesn't mean releasing the iterator too, the
+// iterator would be still valid until released.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ if err := snap.db.ok(); err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+ if snap.released {
+ return iterator.NewEmptyIterator(ErrSnapshotReleased)
+ }
+ // Since iterator already hold version ref, it doesn't need to
+ // hold snapshot ref.
+ return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
+}
+
+// Release releases the snapshot. This will not release any returned
+// iterators, the iterators would still be valid until released or the
+// underlying DB is closed.
+//
+// Other methods should not be called after the snapshot has been released.
+func (snap *Snapshot) Release() {
+ snap.mu.Lock()
+ defer snap.mu.Unlock()
+
+ if !snap.released {
+ // Clear the finalizer.
+ runtime.SetFinalizer(snap, nil)
+
+ snap.released = true
+ snap.db.releaseSnapshot(snap.elem)
+ atomic.AddInt32(&snap.db.aliveSnaps, -1)
+ snap.db = nil
+ snap.elem = nil
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
new file mode 100644
index 00000000..65e1c54b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2013, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+ errHasFrozenMem = errors.New("has frozen mem")
+)
+
+type memDB struct {
+ db *DB
+ *memdb.DB
+ ref int32
+}
+
+func (m *memDB) getref() int32 {
+ return atomic.LoadInt32(&m.ref)
+}
+
+func (m *memDB) incref() {
+ atomic.AddInt32(&m.ref, 1)
+}
+
+func (m *memDB) decref() {
+ if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
+ // Only put back memdb with std capacity.
+ if m.Capacity() == m.db.s.o.GetWriteBuffer() {
+ m.Reset()
+ m.db.mpoolPut(m.DB)
+ }
+ m.db = nil
+ m.DB = nil
+ } else if ref < 0 {
+ panic("negative memdb ref")
+ }
+}
+
+// Get latest sequence number.
+func (db *DB) getSeq() uint64 {
+ return atomic.LoadUint64(&db.seq)
+}
+
+// Atomically adds delta to seq.
+func (db *DB) addSeq(delta uint64) {
+ atomic.AddUint64(&db.seq, delta)
+}
+
+func (db *DB) setSeq(seq uint64) {
+ atomic.StoreUint64(&db.seq, seq)
+}
+
+func (db *DB) sampleSeek(ikey internalKey) {
+ v := db.s.version()
+ if v.sampleSeek(ikey) {
+ // Trigger table compaction.
+ db.compTrigger(db.tcompCmdC)
+ }
+ v.release()
+}
+
+func (db *DB) mpoolPut(mem *memdb.DB) {
+ if !db.isClosed() {
+ select {
+ case db.memPool <- mem:
+ default:
+ }
+ }
+}
+
+func (db *DB) mpoolGet(n int) *memDB {
+ var mdb *memdb.DB
+ select {
+ case mdb = <-db.memPool:
+ default:
+ }
+ if mdb == nil || mdb.Capacity() < n {
+ mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
+ }
+ return &memDB{
+ db: db,
+ DB: mdb,
+ }
+}
+
+func (db *DB) mpoolDrain() {
+ ticker := time.NewTicker(30 * time.Second)
+ for {
+ select {
+ case <-ticker.C:
+ select {
+ case <-db.memPool:
+ default:
+ }
+ case <-db.closeC:
+ ticker.Stop()
+ // Make sure the pool is drained.
+ select {
+ case <-db.memPool:
+ case <-time.After(time.Second):
+ }
+ close(db.memPool)
+ return
+ }
+ }
+}
+
+// Create new memdb and froze the old one; need external synchronization.
+// newMem only called synchronously by the writer.
+func (db *DB) newMem(n int) (mem *memDB, err error) {
+ fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
+ w, err := db.s.stor.Create(fd)
+ if err != nil {
+ db.s.reuseFileNum(fd.Num)
+ return
+ }
+
+ db.memMu.Lock()
+ defer db.memMu.Unlock()
+
+ if db.frozenMem != nil {
+ return nil, errHasFrozenMem
+ }
+
+ if db.journal == nil {
+ db.journal = journal.NewWriter(w)
+ } else {
+ db.journal.Reset(w)
+ db.journalWriter.Close()
+ db.frozenJournalFd = db.journalFd
+ }
+ db.journalWriter = w
+ db.journalFd = fd
+ db.frozenMem = db.mem
+ mem = db.mpoolGet(n)
+ mem.incref() // for self
+ mem.incref() // for caller
+ db.mem = mem
+ // The seq only incremented by the writer. And whoever called newMem
+ // should hold write lock, so no need additional synchronization here.
+ db.frozenSeq = db.seq
+ return
+}
+
+// Get all memdbs.
+func (db *DB) getMems() (e, f *memDB) {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.mem, db.frozenMem
+}
+
+// Get effective memdb.
+func (db *DB) getEffectiveMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.mem != nil {
+ db.mem.incref()
+ } else if !db.isClosed() {
+ panic("nil effective mem")
+ }
+ return db.mem
+}
+
+// Check whether we has frozen memdb.
+func (db *DB) hasFrozenMem() bool {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ return db.frozenMem != nil
+}
+
+// Get frozen memdb.
+func (db *DB) getFrozenMem() *memDB {
+ db.memMu.RLock()
+ defer db.memMu.RUnlock()
+ if db.frozenMem != nil {
+ db.frozenMem.incref()
+ }
+ return db.frozenMem
+}
+
+// Drop frozen memdb; assume that frozen memdb isn't nil.
+func (db *DB) dropFrozenMem() {
+ db.memMu.Lock()
+ if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
+ db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
+ } else {
+ db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
+ }
+ db.frozenJournalFd = storage.FileDesc{}
+ db.frozenMem.decref()
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Clear mems ptr; used by DB.Close().
+func (db *DB) clearMems() {
+ db.memMu.Lock()
+ db.mem = nil
+ db.frozenMem = nil
+ db.memMu.Unlock()
+}
+
+// Set closed flag; return true if not already closed.
+func (db *DB) setClosed() bool {
+ return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
+}
+
+// Check whether DB was closed.
+func (db *DB) isClosed() bool {
+ return atomic.LoadUint32(&db.closed) != 0
+}
+
+// Check read ok status.
+func (db *DB) ok() error {
+ if db.isClosed() {
+ return ErrClosed
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
new file mode 100644
index 00000000..1a000018
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
@@ -0,0 +1,329 @@
+// Copyright (c) 2016, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "errors"
+ "sync"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var errTransactionDone = errors.New("leveldb: transaction already closed")
+
+// Transaction is the transaction handle.
+type Transaction struct {
+ db *DB
+ lk sync.RWMutex
+ seq uint64
+ mem *memDB
+ tables tFiles
+ ikScratch []byte
+ rec sessionRecord
+ stats cStatStaging
+ closed bool
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return nil, errTransactionDone
+ }
+ return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return false, errTransactionDone
+ }
+ return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the transaction.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently while writes to the
+// transaction. The resultant key/value pairs are guaranteed to be consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ tr.lk.RLock()
+ defer tr.lk.RUnlock()
+ if tr.closed {
+ return iterator.NewEmptyIterator(errTransactionDone)
+ }
+ tr.mem.incref()
+ return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
+}
+
+func (tr *Transaction) flush() error {
+ // Flush memdb.
+ if tr.mem.Len() != 0 {
+ tr.stats.startTimer()
+ iter := tr.mem.NewIterator(nil)
+ t, n, err := tr.db.s.tops.createFrom(iter)
+ iter.Release()
+ tr.stats.stopTimer()
+ if err != nil {
+ return err
+ }
+ if tr.mem.getref() == 1 {
+ tr.mem.Reset()
+ } else {
+ tr.mem.decref()
+ tr.mem = tr.db.mpoolGet(0)
+ tr.mem.incref()
+ }
+ tr.tables = append(tr.tables, t)
+ tr.rec.addTableFile(0, t)
+ tr.stats.write += t.size
+ tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ }
+ return nil
+}
+
+func (tr *Transaction) put(kt keyType, key, value []byte) error {
+ tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
+ if tr.mem.Free() < len(tr.ikScratch)+len(value) {
+ if err := tr.flush(); err != nil {
+ return err
+ }
+ }
+ if err := tr.mem.Put(tr.ikScratch, value); err != nil {
+ return err
+ }
+ tr.seq++
+ return nil
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeVal, key, value)
+}
+
+// Delete deletes the value for the given key.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return tr.put(keyTypeDel, key, nil)
+}
+
+// Write apply the given batch to the transaction. The batch will be applied
+// sequentially.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Write returns.
+func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
+ if b == nil || b.Len() == 0 {
+ return nil
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ return b.replayInternal(func(i int, kt keyType, k, v []byte) error {
+ return tr.put(kt, k, v)
+ })
+}
+
+func (tr *Transaction) setDone() {
+ tr.closed = true
+ tr.db.tr = nil
+ tr.mem.decref()
+ <-tr.db.writeLockC
+}
+
+// Commit commits the transaction. If error is not nil, then the transaction is
+// not committed, it can then either be retried or discarded.
+//
+// Other methods should not be called after transaction has been committed.
+func (tr *Transaction) Commit() error {
+ if err := tr.db.ok(); err != nil {
+ return err
+ }
+
+ tr.lk.Lock()
+ defer tr.lk.Unlock()
+ if tr.closed {
+ return errTransactionDone
+ }
+ if err := tr.flush(); err != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return err
+ }
+ if len(tr.tables) != 0 {
+ // Committing transaction.
+ tr.rec.setSeqNum(tr.seq)
+ tr.db.compCommitLk.Lock()
+ tr.stats.startTimer()
+ var cerr error
+ for retry := 0; retry < 3; retry++ {
+ cerr = tr.db.s.commit(&tr.rec)
+ if cerr != nil {
+ tr.db.logf("transaction@commit error R·%d %q", retry, cerr)
+ select {
+ case <-time.After(time.Second):
+ case <-tr.db.closeC:
+ tr.db.logf("transaction@commit exiting")
+ tr.db.compCommitLk.Unlock()
+ return cerr
+ }
+ } else {
+ // Success. Set db.seq.
+ tr.db.setSeq(tr.seq)
+ break
+ }
+ }
+ tr.stats.stopTimer()
+ if cerr != nil {
+ // Return error, lets user decide either to retry or discard
+ // transaction.
+ return cerr
+ }
+
+ // Update compaction stats. This is safe as long as we hold compCommitLk.
+ tr.db.compStats.addStat(0, &tr.stats)
+
+ // Trigger table auto-compaction.
+ tr.db.compTrigger(tr.db.tcompCmdC)
+ tr.db.compCommitLk.Unlock()
+
+ // Additionally, wait compaction when certain threshold reached.
+ // Ignore error, returns error only if transaction can't be committed.
+ tr.db.waitCompaction()
+ }
+ // Only mark as done if transaction committed successfully.
+ tr.setDone()
+ return nil
+}
+
+func (tr *Transaction) discard() {
+ // Discard transaction.
+ for _, t := range tr.tables {
+ tr.db.logf("transaction@discard @%d", t.fd.Num)
+ if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
+ tr.db.s.reuseFileNum(t.fd.Num)
+ }
+ }
+}
+
+// Discard discards the transaction.
+//
+// Other methods should not be called after transaction has been discarded.
+func (tr *Transaction) Discard() {
+ tr.lk.Lock()
+ if !tr.closed {
+ tr.discard()
+ tr.setDone()
+ }
+ tr.lk.Unlock()
+}
+
+func (db *DB) waitCompaction() error {
+ if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() {
+ return db.compTriggerWait(db.tcompCmdC)
+ }
+ return nil
+}
+
+// OpenTransaction opens an atomic DB transaction. Only one transaction can be
+// opened at a time. Subsequent call to Write and OpenTransaction will be blocked
+// until in-flight transaction is committed or discarded.
+// The returned transaction handle is safe for concurrent use.
+//
+// Transaction is expensive and can overwhelm compaction, especially if
+// transaction size is small. Use with caution.
+//
+// The transaction must be closed once done, either by committing or discarding
+// the transaction.
+// Closing the DB will discard open transaction.
+func (db *DB) OpenTransaction() (*Transaction, error) {
+ if err := db.ok(); err != nil {
+ return nil, err
+ }
+
+ // The write happen synchronously.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return nil, err
+ case <-db.closeC:
+ return nil, ErrClosed
+ }
+
+ if db.tr != nil {
+ panic("leveldb: has open transaction")
+ }
+
+ // Flush current memdb.
+ if db.mem != nil && db.mem.Len() != 0 {
+ if _, err := db.rotateMem(0, true); err != nil {
+ return nil, err
+ }
+ }
+
+ // Wait compaction when certain threshold reached.
+ if err := db.waitCompaction(); err != nil {
+ return nil, err
+ }
+
+ tr := &Transaction{
+ db: db,
+ seq: db.seq,
+ mem: db.mpoolGet(0),
+ }
+ tr.mem.incref()
+ db.tr = tr
+ return tr, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
new file mode 100644
index 00000000..3f065489
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
@@ -0,0 +1,102 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader is the interface that wraps basic Get and NewIterator methods.
+// This interface implemented by both DB and Snapshot.
+type Reader interface {
+ Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
+ NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
+}
+
+// Sizes is list of size.
+type Sizes []int64
+
+// Sum returns sum of the sizes.
+func (sizes Sizes) Sum() int64 {
+ var sum int64
+ for _, size := range sizes {
+ sum += size
+ }
+ return sum
+}
+
+// Logging.
+func (db *DB) log(v ...interface{}) { db.s.log(v...) }
+func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
+
+// Check and clean files.
+func (db *DB) checkAndCleanFiles() error {
+ v := db.s.version()
+ defer v.release()
+
+ tmap := make(map[int64]bool)
+ for _, tables := range v.levels {
+ for _, t := range tables {
+ tmap[t.fd.Num] = false
+ }
+ }
+
+ fds, err := db.s.stor.List(storage.TypeAll)
+ if err != nil {
+ return err
+ }
+
+ var nt int
+ var rem []storage.FileDesc
+ for _, fd := range fds {
+ keep := true
+ switch fd.Type {
+ case storage.TypeManifest:
+ keep = fd.Num >= db.s.manifestFd.Num
+ case storage.TypeJournal:
+ if !db.frozenJournalFd.Zero() {
+ keep = fd.Num >= db.frozenJournalFd.Num
+ } else {
+ keep = fd.Num >= db.journalFd.Num
+ }
+ case storage.TypeTable:
+ _, keep = tmap[fd.Num]
+ if keep {
+ tmap[fd.Num] = true
+ nt++
+ }
+ }
+
+ if !keep {
+ rem = append(rem, fd)
+ }
+ }
+
+ if nt != len(tmap) {
+ var mfds []storage.FileDesc
+ for num, present := range tmap {
+ if !present {
+ mfds = append(mfds, storage.FileDesc{Type: storage.TypeTable, Num: num})
+ db.logf("db@janitor table missing @%d", num)
+ }
+ }
+ return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
+ }
+
+ db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
+ for _, fd := range rem {
+ db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
+ if err := db.s.stor.Remove(fd); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
new file mode 100644
index 00000000..db0c1bec
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
@@ -0,0 +1,464 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+ "time"
+
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error {
+ wr, err := db.journal.Next()
+ if err != nil {
+ return err
+ }
+ if err := writeBatchesWithHeader(wr, batches, seq); err != nil {
+ return err
+ }
+ if err := db.journal.Flush(); err != nil {
+ return err
+ }
+ if sync {
+ return db.journalWriter.Sync()
+ }
+ return nil
+}
+
+func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
+ retryLimit := 3
+retry:
+ // Wait for pending memdb compaction.
+ err = db.compTriggerWait(db.mcompCmdC)
+ if err != nil {
+ return
+ }
+ retryLimit--
+
+ // Create new memdb and journal.
+ mem, err = db.newMem(n)
+ if err != nil {
+ if err == errHasFrozenMem {
+ if retryLimit <= 0 {
+ panic("BUG: still has frozen memdb")
+ }
+ goto retry
+ }
+ return
+ }
+
+ // Schedule memdb compaction.
+ if wait {
+ err = db.compTriggerWait(db.mcompCmdC)
+ } else {
+ db.compTrigger(db.mcompCmdC)
+ }
+ return
+}
+
+func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
+ delayed := false
+ slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger()
+ pauseTrigger := db.s.o.GetWriteL0PauseTrigger()
+ flush := func() (retry bool) {
+ mdb = db.getEffectiveMem()
+ if mdb == nil {
+ err = ErrClosed
+ return false
+ }
+ defer func() {
+ if retry {
+ mdb.decref()
+ mdb = nil
+ }
+ }()
+ tLen := db.s.tLen(0)
+ mdbFree = mdb.Free()
+ switch {
+ case tLen >= slowdownTrigger && !delayed:
+ delayed = true
+ time.Sleep(time.Millisecond)
+ case mdbFree >= n:
+ return false
+ case tLen >= pauseTrigger:
+ delayed = true
+ // Set the write paused flag explicitly.
+ atomic.StoreInt32(&db.inWritePaused, 1)
+ err = db.compTriggerWait(db.tcompCmdC)
+ // Unset the write paused flag.
+ atomic.StoreInt32(&db.inWritePaused, 0)
+ if err != nil {
+ return false
+ }
+ default:
+ // Allow memdb to grow if it has no entry.
+ if mdb.Len() == 0 {
+ mdbFree = n
+ } else {
+ mdb.decref()
+ mdb, err = db.rotateMem(n, false)
+ if err == nil {
+ mdbFree = mdb.Free()
+ } else {
+ mdbFree = 0
+ }
+ }
+ return false
+ }
+ return true
+ }
+ start := time.Now()
+ for flush() {
+ }
+ if delayed {
+ db.writeDelay += time.Since(start)
+ db.writeDelayN++
+ } else if db.writeDelayN > 0 {
+ db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+ atomic.AddInt32(&db.cWriteDelayN, int32(db.writeDelayN))
+ atomic.AddInt64(&db.cWriteDelay, int64(db.writeDelay))
+ db.writeDelay = 0
+ db.writeDelayN = 0
+ }
+ return
+}
+
+type writeMerge struct {
+ sync bool
+ batch *Batch
+ keyType keyType
+ key, value []byte
+}
+
+func (db *DB) unlockWrite(overflow bool, merged int, err error) {
+ for i := 0; i < merged; i++ {
+ db.writeAckC <- err
+ }
+ if overflow {
+ // Pass lock to the next write (that failed to merge).
+ db.writeMergedC <- false
+ } else {
+ // Release lock.
+ <-db.writeLockC
+ }
+}
+
+// ourBatch is batch that we can modify.
+func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
+ // Try to flush memdb. This method would also trying to throttle writes
+ // if it is too fast and compaction cannot catch-up.
+ mdb, mdbFree, err := db.flush(batch.internalLen)
+ if err != nil {
+ db.unlockWrite(false, 0, err)
+ return err
+ }
+ defer mdb.decref()
+
+ var (
+ overflow bool
+ merged int
+ batches = []*Batch{batch}
+ )
+
+ if merge {
+ // Merge limit.
+ var mergeLimit int
+ if batch.internalLen > 128<<10 {
+ mergeLimit = (1 << 20) - batch.internalLen
+ } else {
+ mergeLimit = 128 << 10
+ }
+ mergeCap := mdbFree - batch.internalLen
+ if mergeLimit > mergeCap {
+ mergeLimit = mergeCap
+ }
+
+ merge:
+ for mergeLimit > 0 {
+ select {
+ case incoming := <-db.writeMergeC:
+ if incoming.batch != nil {
+ // Merge batch.
+ if incoming.batch.internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ batches = append(batches, incoming.batch)
+ mergeLimit -= incoming.batch.internalLen
+ } else {
+ // Merge put.
+ internalLen := len(incoming.key) + len(incoming.value) + 8
+ if internalLen > mergeLimit {
+ overflow = true
+ break merge
+ }
+ if ourBatch == nil {
+ ourBatch = db.batchPool.Get().(*Batch)
+ ourBatch.Reset()
+ batches = append(batches, ourBatch)
+ }
+ // We can use same batch since concurrent write doesn't
+ // guarantee write order.
+ ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value)
+ mergeLimit -= internalLen
+ }
+ sync = sync || incoming.sync
+ merged++
+ db.writeMergedC <- true
+
+ default:
+ break merge
+ }
+ }
+ }
+
+ // Release ourBatch if any.
+ if ourBatch != nil {
+ defer db.batchPool.Put(ourBatch)
+ }
+
+ // Seq number.
+ seq := db.seq + 1
+
+ // Write journal.
+ if err := db.writeJournal(batches, seq, sync); err != nil {
+ db.unlockWrite(overflow, merged, err)
+ return err
+ }
+
+ // Put batches.
+ for _, batch := range batches {
+ if err := batch.putMem(seq, mdb.DB); err != nil {
+ panic(err)
+ }
+ seq += uint64(batch.Len())
+ }
+
+ // Incr seq number.
+ db.addSeq(uint64(batchesLen(batches)))
+
+ // Rotate memdb if it's reach the threshold.
+ if batch.internalLen >= mdbFree {
+ db.rotateMem(0, false)
+ }
+
+ db.unlockWrite(overflow, merged, nil)
+ return nil
+}
+
+// Write apply the given batch to the DB. The batch records will be applied
+// sequentially. Write might be used concurrently, when used concurrently and
+// batch is small enough, write will try to merge the batches. Set NoWriteMerge
+// option to true to disable write merge.
+//
+// It is safe to modify the contents of the arguments after Write returns but
+// not before. Write will not modify content of the batch.
+func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 {
+ return err
+ }
+
+ // If the batch size is larger than write buffer, it may justified to write
+ // using transaction instead. Using transaction the batch will be written
+ // into tables directly, skipping the journaling.
+ if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
+ tr, err := db.OpenTransaction()
+ if err != nil {
+ return err
+ }
+ if err := tr.Write(batch, wo); err != nil {
+ tr.Discard()
+ return err
+ }
+ return tr.Commit()
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, batch: batch}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ return db.writeLocked(batch, nil, merge, sync)
+}
+
+func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+ sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+ // Acquire write lock.
+ if merge {
+ select {
+ case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}:
+ if <-db.writeMergedC {
+ // Write is merged.
+ return <-db.writeAckC
+ }
+ // Write is not merged, the write lock is handed to us. Continue.
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ } else {
+ select {
+ case db.writeLockC <- struct{}{}:
+ // Write lock acquired.
+ case err := <-db.compPerErrC:
+ // Compaction error.
+ return err
+ case <-db.closeC:
+ // Closed
+ return ErrClosed
+ }
+ }
+
+ batch := db.batchPool.Get().(*Batch)
+ batch.Reset()
+ batch.appendRec(kt, key, value)
+ return db.writeLocked(batch, batch, merge, sync)
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map. Write merge also applies for Put, see
+// Write.
+//
+// It is safe to modify the contents of the arguments after Put returns but not
+// before.
+func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeVal, key, value, wo)
+}
+
+// Delete deletes the value for the given key. Delete will not returns error if
+// key doesn't exist. Write merge also applies for Delete, see Write.
+//
+// It is safe to modify the contents of the arguments after Delete returns but
+// not before.
+func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
+ return db.putRec(keyTypeDel, key, nil, wo)
+}
+
+func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
+ iter := mem.NewIterator(nil)
+ defer iter.Release()
+ return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
+ (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
+}
+
+// CompactRange compacts the underlying DB for the given key range.
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// A nil Range.Start is treated as a key before all keys in the DB.
+// And a nil Range.Limit is treated as a key after all keys in the DB.
+// Therefore if both is nil then it will compact entire DB.
+func (db *DB) CompactRange(r util.Range) error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Check for overlaps in memdb.
+ mdb := db.getEffectiveMem()
+ if mdb == nil {
+ return ErrClosed
+ }
+ defer mdb.decref()
+ if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
+ // Memdb compaction.
+ if _, err := db.rotateMem(0, false); err != nil {
+ <-db.writeLockC
+ return err
+ }
+ <-db.writeLockC
+ if err := db.compTriggerWait(db.mcompCmdC); err != nil {
+ return err
+ }
+ } else {
+ <-db.writeLockC
+ }
+
+ // Table compaction.
+ return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
+}
+
+// SetReadOnly makes DB read-only. It will stay read-only until reopened.
+func (db *DB) SetReadOnly() error {
+ if err := db.ok(); err != nil {
+ return err
+ }
+
+ // Lock writer.
+ select {
+ case db.writeLockC <- struct{}{}:
+ db.compWriteLocking = true
+ case err := <-db.compPerErrC:
+ return err
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ // Set compaction read-only.
+ select {
+ case db.compErrSetC <- ErrReadOnly:
+ case perr := <-db.compPerErrC:
+ return perr
+ case <-db.closeC:
+ return ErrClosed
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/doc.go b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
new file mode 100644
index 00000000..be768e57
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
@@ -0,0 +1,92 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package leveldb provides implementation of LevelDB key/value database.
+//
+// Create or open a database:
+//
+// // The returned DB instance is safe for concurrent use. Which mean that all
+// // DB's methods may be called concurrently from multiple goroutine.
+// db, err := leveldb.OpenFile("path/to/db", nil)
+// ...
+// defer db.Close()
+// ...
+//
+// Read or modify the database content:
+//
+// // Remember that the contents of the returned slice should not be modified.
+// data, err := db.Get([]byte("key"), nil)
+// ...
+// err = db.Put([]byte("key"), []byte("value"), nil)
+// ...
+// err = db.Delete([]byte("key"), nil)
+// ...
+//
+// Iterate over database content:
+//
+// iter := db.NewIterator(nil, nil)
+// for iter.Next() {
+// // Remember that the contents of the returned slice should not be modified, and
+// // only valid until the next call to Next.
+// key := iter.Key()
+// value := iter.Value()
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content with a particular prefix:
+// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Seek-then-Iterate:
+//
+// iter := db.NewIterator(nil, nil)
+// for ok := iter.Seek(key); ok; ok = iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Iterate over subset of database content:
+//
+// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+// for iter.Next() {
+// // Use key/value.
+// ...
+// }
+// iter.Release()
+// err = iter.Error()
+// ...
+//
+// Batch writes:
+//
+// batch := new(leveldb.Batch)
+// batch.Put([]byte("foo"), []byte("value"))
+// batch.Put([]byte("bar"), []byte("another value"))
+// batch.Delete([]byte("baz"))
+// err = db.Write(batch, nil)
+// ...
+//
+// Use bloom filter:
+//
+// o := &opt.Options{
+// Filter: filter.NewBloomFilter(10),
+// }
+// db, err := leveldb.OpenFile("path/to/db", o)
+// ...
+// defer db.Close()
+// ...
+package leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
new file mode 100644
index 00000000..de264981
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
@@ -0,0 +1,20 @@
+// Copyright (c) 2014, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReadOnly = errors.New("leveldb: read-only mode")
+ ErrSnapshotReleased = errors.New("leveldb: snapshot released")
+ ErrIterReleased = errors.New("leveldb: iterator released")
+ ErrClosed = errors.New("leveldb: closed")
+)
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
new file mode 100644
index 00000000..8d6146b6
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2014, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package errors provides common error types used throughout leveldb.
+package errors
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = New("leveldb: not found")
+ ErrReleased = util.ErrReleased
+ ErrHasReleaser = util.ErrHasReleaser
+)
+
+// New returns an error that formats as the given text.
+func New(text string) error {
+ return errors.New(text)
+}
+
+// ErrCorrupted is the type that wraps errors that indicate corruption in
+// the database.
+type ErrCorrupted struct {
+ Fd storage.FileDesc
+ Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// NewErrCorrupted creates new ErrCorrupted error.
+func NewErrCorrupted(fd storage.FileDesc, err error) error {
+ return &ErrCorrupted{fd, err}
+}
+
+// IsCorrupted returns a boolean indicating whether the error is indicating
+// a corruption.
+func IsCorrupted(err error) bool {
+ switch err.(type) {
+ case *ErrCorrupted:
+ return true
+ case *storage.ErrCorrupted:
+ return true
+ }
+ return false
+}
+
+// ErrMissingFiles is the type that indicating a corruption due to missing
+// files. ErrMissingFiles always wrapped with ErrCorrupted.
+type ErrMissingFiles struct {
+ Fds []storage.FileDesc
+}
+
+func (e *ErrMissingFiles) Error() string { return "file missing" }
+
+// SetFd sets 'file info' of the given error with the given file.
+// Currently only ErrCorrupted is supported, otherwise will do nothing.
+func SetFd(err error, fd storage.FileDesc) error {
+ switch x := err.(type) {
+ case *ErrCorrupted:
+ x.Fd = fd
+ return x
+ }
+ return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
new file mode 100644
index 00000000..e961e420
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
@@ -0,0 +1,31 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+type iFilter struct {
+ filter.Filter
+}
+
+func (f iFilter) Contains(filter, key []byte) bool {
+ return f.Filter.Contains(filter, internalKey(key).ukey())
+}
+
+func (f iFilter) NewGenerator() filter.FilterGenerator {
+ return iFilterGenerator{f.Filter.NewGenerator()}
+}
+
+type iFilterGenerator struct {
+ filter.FilterGenerator
+}
+
+func (g iFilterGenerator) Add(key []byte) {
+ g.FilterGenerator.Add(internalKey(key).ukey())
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
new file mode 100644
index 00000000..bab0e997
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
@@ -0,0 +1,116 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package filter
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func bloomHash(key []byte) uint32 {
+ return util.Hash(key, 0xbc9f1d34)
+}
+
+type bloomFilter int
+
+// The bloom filter serializes its parameters and is backward compatible
+// with respect to them. Therefor, its parameters are not added to its
+// name.
+func (bloomFilter) Name() string {
+ return "leveldb.BuiltinBloomFilter"
+}
+
+func (f bloomFilter) Contains(filter, key []byte) bool {
+ nBytes := len(filter) - 1
+ if nBytes < 1 {
+ return false
+ }
+ nBits := uint32(nBytes * 8)
+
+ // Use the encoded k so that we can read filters generated by
+ // bloom filters created using different parameters.
+ k := filter[nBytes]
+ if k > 30 {
+ // Reserved for potentially new encodings for short bloom filters.
+ // Consider it a match.
+ return true
+ }
+
+ kh := bloomHash(key)
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < k; j++ {
+ bitpos := kh % nBits
+ if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
+ return false
+ }
+ kh += delta
+ }
+ return true
+}
+
+func (f bloomFilter) NewGenerator() FilterGenerator {
+ // Round down to reduce probing cost a little bit.
+ k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
+ if k < 1 {
+ k = 1
+ } else if k > 30 {
+ k = 30
+ }
+ return &bloomFilterGenerator{
+ n: int(f),
+ k: k,
+ }
+}
+
+type bloomFilterGenerator struct {
+ n int
+ k uint8
+
+ keyHashes []uint32
+}
+
+func (g *bloomFilterGenerator) Add(key []byte) {
+ // Use double-hashing to generate a sequence of hash values.
+ // See analysis in [Kirsch,Mitzenmacher 2006].
+ g.keyHashes = append(g.keyHashes, bloomHash(key))
+}
+
+func (g *bloomFilterGenerator) Generate(b Buffer) {
+ // Compute bloom filter size (in both bits and bytes)
+ nBits := uint32(len(g.keyHashes) * g.n)
+ // For small n, we can see a very high false positive rate. Fix it
+ // by enforcing a minimum bloom filter length.
+ if nBits < 64 {
+ nBits = 64
+ }
+ nBytes := (nBits + 7) / 8
+ nBits = nBytes * 8
+
+ dest := b.Alloc(int(nBytes) + 1)
+ dest[nBytes] = g.k
+ for _, kh := range g.keyHashes {
+ delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+ for j := uint8(0); j < g.k; j++ {
+ bitpos := kh % nBits
+ dest[bitpos/8] |= (1 << (bitpos % 8))
+ kh += delta
+ }
+ }
+
+ g.keyHashes = g.keyHashes[:0]
+}
+
+// NewBloomFilter creates a new initialized bloom filter for given
+// bitsPerKey.
+//
+// Since bitsPerKey is persisted individually for each bloom filter
+// serialization, bloom filters are backwards compatible with respect to
+// changing bitsPerKey. This means that no big performance penalty will
+// be experienced when changing the parameter. See documentation for
+// opt.Options.Filter for more information.
+func NewBloomFilter(bitsPerKey int) Filter {
+ return bloomFilter(bitsPerKey)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
new file mode 100644
index 00000000..7a925c5a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package filter provides interface and implementation of probabilistic
+// data structure.
+//
+// The filter is resposible for creating small filter from a set of keys.
+// These filter will then used to test whether a key is a member of the set.
+// In many cases, a filter can cut down the number of disk seeks from a
+// handful to a single disk seek per DB.Get call.
+package filter
+
+// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods.
+type Buffer interface {
+ // Alloc allocs n bytes of slice from the buffer. This also advancing
+ // write offset.
+ Alloc(n int) []byte
+
+ // Write appends the contents of p to the buffer.
+ Write(p []byte) (n int, err error)
+
+ // WriteByte appends the byte c to the buffer.
+ WriteByte(c byte) error
+}
+
+// Filter is the filter.
+type Filter interface {
+ // Name returns the name of this policy.
+ //
+ // Note that if the filter encoding changes in an incompatible way,
+ // the name returned by this method must be changed. Otherwise, old
+ // incompatible filters may be passed to methods of this type.
+ Name() string
+
+ // NewGenerator creates a new filter generator.
+ NewGenerator() FilterGenerator
+
+ // Contains returns true if the filter contains the given key.
+ //
+ // The filter are filters generated by the filter generator.
+ Contains(filter, key []byte) bool
+}
+
+// FilterGenerator is the filter generator.
+type FilterGenerator interface {
+ // Add adds a key to the filter generator.
+ //
+ // The key may become invalid after call to this method end, therefor
+ // key must be copied if implementation require keeping key for later
+ // use. The key should not modified directly, doing so may cause
+ // undefined results.
+ Add(key []byte)
+
+ // Generate generates filters based on keys passed so far. After call
+ // to Generate the filter generator maybe resetted, depends on implementation.
+ Generate(b Buffer)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
new file mode 100644
index 00000000..a23ab05f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2014, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// BasicArray is the interface that wraps basic Len and Search method.
+type BasicArray interface {
+ // Len returns length of the array.
+ Len() int
+
+ // Search finds smallest index that point to a key that is greater
+ // than or equal to the given key.
+ Search(key []byte) int
+}
+
+// Array is the interface that wraps BasicArray and basic Index method.
+type Array interface {
+ BasicArray
+
+ // Index returns key/value pair with index of i.
+ Index(i int) (key, value []byte)
+}
+
+// Array is the interface that wraps BasicArray and basic Get method.
+type ArrayIndexer interface {
+ BasicArray
+
+ // Get returns a new data iterator with index of i.
+ Get(i int) Iterator
+}
+
+type basicArrayIterator struct {
+ util.BasicReleaser
+ array BasicArray
+ pos int
+ err error
+}
+
+func (i *basicArrayIterator) Valid() bool {
+ return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
+}
+
+func (i *basicArrayIterator) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.array.Len() == 0 {
+ i.pos = -1
+ return false
+ }
+ i.pos = 0
+ return true
+}
+
+func (i *basicArrayIterator) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = n - 1
+ return true
+}
+
+func (i *basicArrayIterator) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ n := i.array.Len()
+ if n == 0 {
+ i.pos = 0
+ return false
+ }
+ i.pos = i.array.Search(key)
+ if i.pos >= n {
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos++
+ if n := i.array.Len(); i.pos >= n {
+ i.pos = n
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.pos--
+ if i.pos < 0 {
+ i.pos = -1
+ return false
+ }
+ return true
+}
+
+func (i *basicArrayIterator) Error() error { return i.err }
+
+type arrayIterator struct {
+ basicArrayIterator
+ array Array
+ pos int
+ key, value []byte
+}
+
+func (i *arrayIterator) updateKV() {
+ if i.pos == i.basicArrayIterator.pos {
+ return
+ }
+ i.pos = i.basicArrayIterator.pos
+ if i.Valid() {
+ i.key, i.value = i.array.Index(i.pos)
+ } else {
+ i.key = nil
+ i.value = nil
+ }
+}
+
+func (i *arrayIterator) Key() []byte {
+ i.updateKV()
+ return i.key
+}
+
+func (i *arrayIterator) Value() []byte {
+ i.updateKV()
+ return i.value
+}
+
+type arrayIteratorIndexer struct {
+ basicArrayIterator
+ array ArrayIndexer
+}
+
+func (i *arrayIteratorIndexer) Get() Iterator {
+ if i.Valid() {
+ return i.array.Get(i.basicArrayIterator.pos)
+ }
+ return nil
+}
+
+// NewArrayIterator returns an iterator from the given array.
+func NewArrayIterator(array Array) Iterator {
+ return &arrayIterator{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ pos: -1,
+ }
+}
+
+// NewArrayIndexer returns an index iterator from the given array.
+func NewArrayIndexer(array ArrayIndexer) IteratorIndexer {
+ return &arrayIteratorIndexer{
+ basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+ array: array,
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
new file mode 100644
index 00000000..939adbb9
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// IteratorIndexer is the interface that wraps CommonIterator and basic Get
+// method. IteratorIndexer provides index for indexed iterator.
+type IteratorIndexer interface {
+ CommonIterator
+
+ // Get returns a new data iterator for the current position, or nil if
+ // done.
+ Get() Iterator
+}
+
+type indexedIterator struct {
+ util.BasicReleaser
+ index IteratorIndexer
+ strict bool
+
+ data Iterator
+ err error
+ errf func(err error)
+ closed bool
+}
+
+func (i *indexedIterator) setData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = i.index.Get()
+}
+
+func (i *indexedIterator) clearData() {
+ if i.data != nil {
+ i.data.Release()
+ }
+ i.data = nil
+}
+
+func (i *indexedIterator) indexErr() {
+ if err := i.index.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ i.err = err
+ }
+}
+
+func (i *indexedIterator) dataErr() bool {
+ if err := i.data.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *indexedIterator) Valid() bool {
+ return i.data != nil && i.data.Valid()
+}
+
+func (i *indexedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.First() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ return i.Next()
+}
+
+func (i *indexedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Last() {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ return true
+}
+
+func (i *indexedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if !i.index.Seek(key) {
+ i.indexErr()
+ i.clearData()
+ return false
+ }
+ i.setData()
+ if !i.data.Seek(key) {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Next() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Next():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Next() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ return i.Next()
+ }
+ return true
+}
+
+func (i *indexedIterator) Prev() bool {
+ if i.err != nil {
+ return false
+ } else if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch {
+ case i.data != nil && !i.data.Prev():
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ fallthrough
+ case i.data == nil:
+ if !i.index.Prev() {
+ i.indexErr()
+ return false
+ }
+ i.setData()
+ if !i.data.Last() {
+ if i.dataErr() {
+ return false
+ }
+ i.clearData()
+ return i.Prev()
+ }
+ }
+ return true
+}
+
+func (i *indexedIterator) Key() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Key()
+}
+
+func (i *indexedIterator) Value() []byte {
+ if i.data == nil {
+ return nil
+ }
+ return i.data.Value()
+}
+
+func (i *indexedIterator) Release() {
+ i.clearData()
+ i.index.Release()
+ i.BasicReleaser.Release()
+}
+
+func (i *indexedIterator) Error() error {
+ if i.err != nil {
+ return i.err
+ }
+ if err := i.index.Error(); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (i *indexedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
+// that returns another iterator, a 'data iterator'. A 'data iterator' is the
+// iterator that contains actual key/value pairs.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
+// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
+// ignored and will halt the iterator.
+func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
+ return &indexedIterator{index: index, strict: strict}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
new file mode 100644
index 00000000..96fb0f68
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
@@ -0,0 +1,132 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package iterator provides interface and implementation to traverse over
+// contents of a database.
+package iterator
+
+import (
+ "errors"
+
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+ ErrIterReleased = errors.New("leveldb/iterator: iterator released")
+)
+
+// IteratorSeeker is the interface that wraps the 'seeks method'.
+type IteratorSeeker interface {
+ // First moves the iterator to the first key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ First() bool
+
+ // Last moves the iterator to the last key/value pair. If the iterator
+ // only contains one key/value pair then First and Last would moves
+ // to the same key/value pair.
+ // It returns whether such pair exist.
+ Last() bool
+
+ // Seek moves the iterator to the first key/value pair whose key is greater
+ // than or equal to the given key.
+ // It returns whether such pair exist.
+ //
+ // It is safe to modify the contents of the argument after Seek returns.
+ Seek(key []byte) bool
+
+ // Next moves the iterator to the next key/value pair.
+ // It returns false if the iterator is exhausted.
+ Next() bool
+
+ // Prev moves the iterator to the previous key/value pair.
+ // It returns false if the iterator is exhausted.
+ Prev() bool
+}
+
+// CommonIterator is the interface that wraps common iterator methods.
+type CommonIterator interface {
+ IteratorSeeker
+
+ // util.Releaser is the interface that wraps basic Release method.
+ // When called Release will releases any resources associated with the
+ // iterator.
+ util.Releaser
+
+ // util.ReleaseSetter is the interface that wraps the basic SetReleaser
+ // method.
+ util.ReleaseSetter
+
+ // TODO: Remove this when ready.
+ Valid() bool
+
+ // Error returns any accumulated error. Exhausting all the key/value pairs
+ // is not considered to be an error.
+ Error() error
+}
+
+// Iterator iterates over a DB's key/value pairs in key order.
+//
+// When encounter an error any 'seeks method' will return false and will
+// yield no key/value pairs. The error can be queried by calling the Error
+// method. Calling Release is still necessary.
+//
+// An iterator must be released after use, but it is not necessary to read
+// an iterator until exhaustion.
+// Also, an iterator is not necessarily safe for concurrent use, but it is
+// safe to use multiple iterators concurrently, with each in a dedicated
+// goroutine.
+type Iterator interface {
+ CommonIterator
+
+ // Key returns the key of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Key() []byte
+
+ // Value returns the value of the current key/value pair, or nil if done.
+ // The caller should not modify the contents of the returned slice, and
+ // its contents may change on the next call to any 'seeks method'.
+ Value() []byte
+}
+
+// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback
+// method.
+//
+// ErrorCallbackSetter implemented by indexed and merged iterator.
+type ErrorCallbackSetter interface {
+ // SetErrorCallback allows set an error callback of the corresponding
+ // iterator. Use nil to clear the callback.
+ SetErrorCallback(f func(err error))
+}
+
+type emptyIterator struct {
+ util.BasicReleaser
+ err error
+}
+
+func (i *emptyIterator) rErr() {
+ if i.err == nil && i.Released() {
+ i.err = ErrIterReleased
+ }
+}
+
+func (*emptyIterator) Valid() bool { return false }
+func (i *emptyIterator) First() bool { i.rErr(); return false }
+func (i *emptyIterator) Last() bool { i.rErr(); return false }
+func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false }
+func (i *emptyIterator) Next() bool { i.rErr(); return false }
+func (i *emptyIterator) Prev() bool { i.rErr(); return false }
+func (*emptyIterator) Key() []byte { return nil }
+func (*emptyIterator) Value() []byte { return nil }
+func (i *emptyIterator) Error() error { return i.err }
+
+// NewEmptyIterator creates an empty iterator. The err parameter can be
+// nil, but if not nil the given err will be returned by Error method.
+func NewEmptyIterator(err error) Iterator {
+ return &emptyIterator{err: err}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
new file mode 100644
index 00000000..1a7e29df
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
@@ -0,0 +1,304 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type mergedIterator struct {
+ cmp comparer.Comparer
+ iters []Iterator
+ strict bool
+
+ keys [][]byte
+ index int
+ dir dir
+ err error
+ errf func(err error)
+ releaser util.Releaser
+}
+
+func assertKey(key []byte) []byte {
+ if key == nil {
+ panic("leveldb/iterator: nil key")
+ }
+ return key
+}
+
+func (i *mergedIterator) iterErr(iter Iterator) bool {
+ if err := iter.Error(); err != nil {
+ if i.errf != nil {
+ i.errf(err)
+ }
+ if i.strict || !errors.IsCorrupted(err) {
+ i.err = err
+ return true
+ }
+ }
+ return false
+}
+
+func (i *mergedIterator) Valid() bool {
+ return i.err == nil && i.dir > dirEOI
+}
+
+func (i *mergedIterator) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.First():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirEOI
+ return i.prev()
+}
+
+func (i *mergedIterator) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ for x, iter := range i.iters {
+ switch {
+ case iter.Seek(key):
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ i.dir = dirSOI
+ return i.next()
+}
+
+func (i *mergedIterator) next() bool {
+ var key []byte
+ if i.dir == dirForward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirEOI
+ return false
+ }
+ i.dir = dirForward
+ return true
+}
+
+func (i *mergedIterator) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirSOI:
+ return i.First()
+ case dirBackward:
+ key := append([]byte{}, i.keys[i.index]...)
+ if !i.Seek(key) {
+ return false
+ }
+ return i.Next()
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Next():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.next()
+}
+
+func (i *mergedIterator) prev() bool {
+ var key []byte
+ if i.dir == dirBackward {
+ key = i.keys[i.index]
+ }
+ for x, tkey := range i.keys {
+ if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) {
+ key = tkey
+ i.index = x
+ }
+ }
+ if key == nil {
+ i.dir = dirSOI
+ return false
+ }
+ i.dir = dirBackward
+ return true
+}
+
+func (i *mergedIterator) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ switch i.dir {
+ case dirEOI:
+ return i.Last()
+ case dirForward:
+ key := append([]byte{}, i.keys[i.index]...)
+ for x, iter := range i.iters {
+ if x == i.index {
+ continue
+ }
+ seek := iter.Seek(key)
+ switch {
+ case seek && iter.Prev(), !seek && iter.Last():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ }
+ }
+
+ x := i.index
+ iter := i.iters[x]
+ switch {
+ case iter.Prev():
+ i.keys[x] = assertKey(iter.Key())
+ case i.iterErr(iter):
+ return false
+ default:
+ i.keys[x] = nil
+ }
+ return i.prev()
+}
+
+func (i *mergedIterator) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.keys[i.index]
+}
+
+func (i *mergedIterator) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.iters[i.index].Value()
+}
+
+func (i *mergedIterator) Release() {
+ if i.dir != dirReleased {
+ i.dir = dirReleased
+ for _, iter := range i.iters {
+ iter.Release()
+ }
+ i.iters = nil
+ i.keys = nil
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *mergedIterator) Error() error {
+ return i.err
+}
+
+func (i *mergedIterator) SetErrorCallback(f func(err error)) {
+ i.errf = f
+}
+
+// NewMergedIterator returns an iterator that merges its input. Walking the
+// resultant iterator will return all key/value pairs of all input iterators
+// in strictly increasing key order, as defined by cmp.
+// The input's key ranges may overlap, but there are assumed to be no duplicate
+// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
+// None of the iters may be nil.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'merged iterator', otherwise the iterator will
+// continue to the next 'input iterator'.
+func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
+ return &mergedIterator{
+ iters: iters,
+ cmp: cmp,
+ strict: strict,
+ keys: make([][]byte, len(iters)),
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
new file mode 100644
index 00000000..d094c3d0
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
@@ -0,0 +1,524 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0
+// License, authors and contributors informations can be found at bellow URLs respectively:
+// https://code.google.com/p/leveldb-go/source/browse/LICENSE
+// https://code.google.com/p/leveldb-go/source/browse/AUTHORS
+// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS
+
+// Package journal reads and writes sequences of journals. Each journal is a stream
+// of bytes that completes before the next journal starts.
+//
+// When reading, call Next to obtain an io.Reader for the next journal. Next will
+// return io.EOF when there are no more journals. It is valid to call Next
+// without reading the current journal to exhaustion.
+//
+// When writing, call Next to obtain an io.Writer for the next journal. Calling
+// Next finishes the current journal. Call Close to finish the final journal.
+//
+// Optionally, call Flush to finish the current journal and flush the underlying
+// writer without starting a new journal. To start a new journal after flushing,
+// call Next.
+//
+// Neither Readers or Writers are safe to use concurrently.
+//
+// Example code:
+// func read(r io.Reader) ([]string, error) {
+// var ss []string
+// journals := journal.NewReader(r, nil, true, true)
+// for {
+// j, err := journals.Next()
+// if err == io.EOF {
+// break
+// }
+// if err != nil {
+// return nil, err
+// }
+// s, err := ioutil.ReadAll(j)
+// if err != nil {
+// return nil, err
+// }
+// ss = append(ss, string(s))
+// }
+// return ss, nil
+// }
+//
+// func write(w io.Writer, ss []string) error {
+// journals := journal.NewWriter(w)
+// for _, s := range ss {
+// j, err := journals.Next()
+// if err != nil {
+// return err
+// }
+// if _, err := j.Write([]byte(s)), err != nil {
+// return err
+// }
+// }
+// return journals.Close()
+// }
+//
+// The wire format is that the stream is divided into 32KiB blocks, and each
+// block contains a number of tightly packed chunks. Chunks cannot cross block
+// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a
+// block must be zero.
+//
+// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4
+// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type)
+// followed by a payload. The checksum is over the chunk type and the payload.
+//
+// There are four chunk types: whether the chunk is the full journal, or the
+// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal
+// has one first chunk, zero or more middle chunks, and one last chunk.
+//
+// The wire format allows for limited recovery in the face of data corruption:
+// on a format error (such as a checksum mismatch), the reader moves to the
+// next block and looks for the next full or first chunk.
+package journal
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// These constants are part of the wire format and should not be changed.
+const (
+ fullChunkType = 1
+ firstChunkType = 2
+ middleChunkType = 3
+ lastChunkType = 4
+)
+
+const (
+ blockSize = 32 * 1024
+ headerSize = 7
+)
+
+type flusher interface {
+ Flush() error
+}
+
+// ErrCorrupted is the error type that generated by corrupted block or chunk.
+type ErrCorrupted struct {
+ Size int
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
+}
+
+// Dropper is the interface that wrap simple Drop method. The Drop
+// method will be called when the journal reader dropping a block or chunk.
+type Dropper interface {
+ Drop(err error)
+}
+
+// Reader reads journals from an underlying io.Reader.
+type Reader struct {
+ // r is the underlying reader.
+ r io.Reader
+ // the dropper.
+ dropper Dropper
+ // strict flag.
+ strict bool
+ // checksum flag.
+ checksum bool
+ // seq is the sequence number of the current journal.
+ seq int
+ // buf[i:j] is the unread portion of the current chunk's payload.
+ // The low bound, i, excludes the chunk header.
+ i, j int
+ // n is the number of bytes of buf that are valid. Once reading has started,
+ // only the final block can have n < blockSize.
+ n int
+ // last is whether the current chunk is the last chunk of the journal.
+ last bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewReader returns a new reader. The dropper may be nil, and if
+// strict is true then corrupted or invalid chunk will halt the journal
+// reader entirely.
+func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
+ return &Reader{
+ r: r,
+ dropper: dropper,
+ strict: strict,
+ checksum: checksum,
+ last: true,
+ }
+}
+
+var errSkip = errors.New("leveldb/journal: skipped")
+
+func (r *Reader) corrupt(n int, reason string, skip bool) error {
+ if r.dropper != nil {
+ r.dropper.Drop(&ErrCorrupted{n, reason})
+ }
+ if r.strict && !skip {
+ r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
+ return r.err
+ }
+ return errSkip
+}
+
+// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
+// next block into the buffer if necessary.
+func (r *Reader) nextChunk(first bool) error {
+ for {
+ if r.j+headerSize <= r.n {
+ checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
+ length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
+ chunkType := r.buf[r.j+6]
+ unprocBlock := r.n - r.j
+ if checksum == 0 && length == 0 && chunkType == 0 {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "zero header", false)
+ }
+ if chunkType < fullChunkType || chunkType > lastChunkType {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false)
+ }
+ r.i = r.j + headerSize
+ r.j = r.j + headerSize + int(length)
+ if r.j > r.n {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "chunk length overflows block", false)
+ } else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
+ // Drop entire block.
+ r.i = r.n
+ r.j = r.n
+ return r.corrupt(unprocBlock, "checksum mismatch", false)
+ }
+ if first && chunkType != fullChunkType && chunkType != firstChunkType {
+ chunkLength := (r.j - r.i) + headerSize
+ r.i = r.j
+ // Report the error, but skip it.
+ return r.corrupt(chunkLength, "orphan chunk", true)
+ }
+ r.last = chunkType == fullChunkType || chunkType == lastChunkType
+ return nil
+ }
+
+ // The last block.
+ if r.n < blockSize && r.n > 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+
+ // Read block.
+ n, err := io.ReadFull(r.r, r.buf[:])
+ if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+ return err
+ }
+ if n == 0 {
+ if !first {
+ return r.corrupt(0, "missing chunk part", false)
+ }
+ r.err = io.EOF
+ return r.err
+ }
+ r.i, r.j, r.n = 0, 0, n
+ }
+}
+
+// Next returns a reader for the next journal. It returns io.EOF if there are no
+// more journals. The reader returned becomes stale after the next Next call,
+// and should no longer be used. If strict is false, the reader will returns
+// io.ErrUnexpectedEOF error when found corrupted journal.
+func (r *Reader) Next() (io.Reader, error) {
+ r.seq++
+ if r.err != nil {
+ return nil, r.err
+ }
+ r.i = r.j
+ for {
+ if err := r.nextChunk(true); err == nil {
+ break
+ } else if err != errSkip {
+ return nil, err
+ }
+ }
+ return &singleReader{r, r.seq, nil}, nil
+}
+
+// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
+// last accumulated error.
+func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
+ r.seq++
+ err := r.err
+ r.r = reader
+ r.dropper = dropper
+ r.strict = strict
+ r.checksum = checksum
+ r.i = 0
+ r.j = 0
+ r.n = 0
+ r.last = true
+ r.err = nil
+ return err
+}
+
+type singleReader struct {
+ r *Reader
+ seq int
+ err error
+}
+
+func (x *singleReader) Read(p []byte) (int, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ n := copy(p, r.buf[r.i:r.j])
+ r.i += n
+ return n, nil
+}
+
+func (x *singleReader) ReadByte() (byte, error) {
+ r := x.r
+ if r.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale reader")
+ }
+ if x.err != nil {
+ return 0, x.err
+ }
+ if r.err != nil {
+ return 0, r.err
+ }
+ for r.i == r.j {
+ if r.last {
+ return 0, io.EOF
+ }
+ x.err = r.nextChunk(false)
+ if x.err != nil {
+ if x.err == errSkip {
+ x.err = io.ErrUnexpectedEOF
+ }
+ return 0, x.err
+ }
+ }
+ c := r.buf[r.i]
+ r.i++
+ return c, nil
+}
+
+// Writer writes journals to an underlying io.Writer.
+type Writer struct {
+ // w is the underlying writer.
+ w io.Writer
+ // seq is the sequence number of the current journal.
+ seq int
+ // f is w as a flusher.
+ f flusher
+ // buf[i:j] is the bytes that will become the current chunk.
+ // The low bound, i, includes the chunk header.
+ i, j int
+ // buf[:written] has already been written to w.
+ // written is zero unless Flush has been called.
+ written int
+ // first is whether the current chunk is the first chunk of the journal.
+ first bool
+ // pending is whether a chunk is buffered but not yet written.
+ pending bool
+ // err is any accumulated error.
+ err error
+ // buf is the buffer.
+ buf [blockSize]byte
+}
+
+// NewWriter returns a new Writer.
+func NewWriter(w io.Writer) *Writer {
+ f, _ := w.(flusher)
+ return &Writer{
+ w: w,
+ f: f,
+ }
+}
+
+// fillHeader fills in the header for the pending chunk.
+func (w *Writer) fillHeader(last bool) {
+ if w.i+headerSize > w.j || w.j > blockSize {
+ panic("leveldb/journal: bad writer state")
+ }
+ if last {
+ if w.first {
+ w.buf[w.i+6] = fullChunkType
+ } else {
+ w.buf[w.i+6] = lastChunkType
+ }
+ } else {
+ if w.first {
+ w.buf[w.i+6] = firstChunkType
+ } else {
+ w.buf[w.i+6] = middleChunkType
+ }
+ }
+ binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value())
+ binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize))
+}
+
+// writeBlock writes the buffered block to the underlying writer, and reserves
+// space for the next chunk's header.
+func (w *Writer) writeBlock() {
+ _, w.err = w.w.Write(w.buf[w.written:])
+ w.i = 0
+ w.j = headerSize
+ w.written = 0
+}
+
+// writePending finishes the current journal and writes the buffer to the
+// underlying writer.
+func (w *Writer) writePending() {
+ if w.err != nil {
+ return
+ }
+ if w.pending {
+ w.fillHeader(true)
+ w.pending = false
+ }
+ _, w.err = w.w.Write(w.buf[w.written:w.j])
+ w.written = w.j
+}
+
+// Close finishes the current journal and closes the writer.
+func (w *Writer) Close() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ w.err = errors.New("leveldb/journal: closed Writer")
+ return nil
+}
+
+// Flush finishes the current journal, writes to the underlying writer, and
+// flushes it if that writer implements interface{ Flush() error }.
+func (w *Writer) Flush() error {
+ w.seq++
+ w.writePending()
+ if w.err != nil {
+ return w.err
+ }
+ if w.f != nil {
+ w.err = w.f.Flush()
+ return w.err
+ }
+ return nil
+}
+
+// Reset resets the journal writer, allows reuse of the journal writer. Reset
+// will also closes the journal writer if not already.
+func (w *Writer) Reset(writer io.Writer) (err error) {
+ w.seq++
+ if w.err == nil {
+ w.writePending()
+ err = w.err
+ }
+ w.w = writer
+ w.f, _ = writer.(flusher)
+ w.i = 0
+ w.j = 0
+ w.written = 0
+ w.first = false
+ w.pending = false
+ w.err = nil
+ return
+}
+
+// Next returns a writer for the next journal. The writer returned becomes stale
+// after the next Close, Flush or Next call, and should no longer be used.
+func (w *Writer) Next() (io.Writer, error) {
+ w.seq++
+ if w.err != nil {
+ return nil, w.err
+ }
+ if w.pending {
+ w.fillHeader(true)
+ }
+ w.i = w.j
+ w.j = w.j + headerSize
+ // Check if there is room in the block for the header.
+ if w.j > blockSize {
+ // Fill in the rest of the block with zeroes.
+ for k := w.i; k < blockSize; k++ {
+ w.buf[k] = 0
+ }
+ w.writeBlock()
+ if w.err != nil {
+ return nil, w.err
+ }
+ }
+ w.first = true
+ w.pending = true
+ return singleWriter{w, w.seq}, nil
+}
+
+type singleWriter struct {
+ w *Writer
+ seq int
+}
+
+func (x singleWriter) Write(p []byte) (int, error) {
+ w := x.w
+ if w.seq != x.seq {
+ return 0, errors.New("leveldb/journal: stale writer")
+ }
+ if w.err != nil {
+ return 0, w.err
+ }
+ n0 := len(p)
+ for len(p) > 0 {
+ // Write a block, if it is full.
+ if w.j == blockSize {
+ w.fillHeader(false)
+ w.writeBlock()
+ if w.err != nil {
+ return 0, w.err
+ }
+ w.first = false
+ }
+ // Copy bytes into the buffer.
+ n := copy(w.buf[w.j:], p)
+ w.j += n
+ p = p[n:]
+ }
+ return n0, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
new file mode 100644
index 00000000..ad8f51ec
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "encoding/binary"
+ "fmt"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrInternalKeyCorrupted records internal key corruption.
+type ErrInternalKeyCorrupted struct {
+ Ikey []byte
+ Reason string
+}
+
+func (e *ErrInternalKeyCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
+}
+
+func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
+ return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
+}
+
+type keyType uint
+
+func (kt keyType) String() string {
+ switch kt {
+ case keyTypeDel:
+ return "d"
+ case keyTypeVal:
+ return "v"
+ }
+ return fmt.Sprintf("", uint(kt))
+}
+
+// Value types encoded as the last component of internal keys.
+// Don't modify; this value are saved to disk.
+const (
+ keyTypeDel = keyType(0)
+ keyTypeVal = keyType(1)
+)
+
+// keyTypeSeek defines the keyType that should be passed when constructing an
+// internal key for seeking to a particular sequence number (since we
+// sort sequence numbers in decreasing order and the value type is
+// embedded as the low 8 bits in the sequence number in internal keys,
+// we need to use the highest-numbered ValueType, not the lowest).
+const keyTypeSeek = keyTypeVal
+
+const (
+ // Maximum value possible for sequence number; the 8-bits are
+ // used by value type, so its can packed together in single
+ // 64-bit integer.
+ keyMaxSeq = (uint64(1) << 56) - 1
+ // Maximum value possible for packed sequence number and type.
+ keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
+)
+
+// Maximum number encoded in bytes.
+var keyMaxNumBytes = make([]byte, 8)
+
+func init() {
+ binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
+}
+
+type internalKey []byte
+
+func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
+ if seq > keyMaxSeq {
+ panic("leveldb: invalid sequence number")
+ } else if kt > keyTypeVal {
+ panic("leveldb: invalid type")
+ }
+
+ dst = ensureBuffer(dst, len(ukey)+8)
+ copy(dst, ukey)
+ binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
+ return internalKey(dst)
+}
+
+func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
+ if len(ik) < 8 {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
+ }
+ num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
+ }
+ ukey = ik[:len(ik)-8]
+ return
+}
+
+func validInternalKey(ik []byte) bool {
+ _, _, _, err := parseInternalKey(ik)
+ return err == nil
+}
+
+func (ik internalKey) assert() {
+ if ik == nil {
+ panic("leveldb: nil internalKey")
+ }
+ if len(ik) < 8 {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
+ }
+}
+
+func (ik internalKey) ukey() []byte {
+ ik.assert()
+ return ik[:len(ik)-8]
+}
+
+func (ik internalKey) num() uint64 {
+ ik.assert()
+ return binary.LittleEndian.Uint64(ik[len(ik)-8:])
+}
+
+func (ik internalKey) parseNum() (seq uint64, kt keyType) {
+ num := ik.num()
+ seq, kt = uint64(num>>8), keyType(num&0xff)
+ if kt > keyTypeVal {
+ panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
+ }
+ return
+}
+
+func (ik internalKey) String() string {
+ if ik == nil {
+ return ""
+ }
+
+ if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
+ return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
+ }
+ return fmt.Sprintf("", []byte(ik))
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
new file mode 100644
index 00000000..824e47f5
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@@ -0,0 +1,479 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package memdb provides in-memory key/value database implementation.
+package memdb
+
+import (
+ "math/rand"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrIterReleased = errors.New("leveldb/memdb: iterator released")
+)
+
+const tMaxHeight = 12
+
+type dbIter struct {
+ util.BasicReleaser
+ p *DB
+ slice *util.Range
+ node int
+ forward bool
+ key, value []byte
+ err error
+}
+
+func (i *dbIter) fill(checkStart, checkLimit bool) bool {
+ if i.node != 0 {
+ n := i.p.nodeData[i.node]
+ m := n + i.p.nodeData[i.node+nKey]
+ i.key = i.p.kvData[n:m]
+ if i.slice != nil {
+ switch {
+ case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0:
+ fallthrough
+ case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0:
+ i.node = 0
+ goto bail
+ }
+ }
+ i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]]
+ return true
+ }
+bail:
+ i.key = nil
+ i.value = nil
+ return false
+}
+
+func (i *dbIter) Valid() bool {
+ return i.node != 0
+}
+
+func (i *dbIter) First() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil {
+ i.node, _ = i.p.findGE(i.slice.Start, false)
+ } else {
+ i.node = i.p.nodeData[nNext]
+ }
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Last() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Limit != nil {
+ i.node = i.p.findLT(i.slice.Limit)
+ } else {
+ i.node = i.p.findLast()
+ }
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 {
+ key = i.slice.Start
+ }
+ i.node, _ = i.p.findGE(key, false)
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Next() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if !i.forward {
+ return i.First()
+ }
+ return false
+ }
+ i.forward = true
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.nodeData[i.node+nNext]
+ return i.fill(false, true)
+}
+
+func (i *dbIter) Prev() bool {
+ if i.Released() {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.node == 0 {
+ if i.forward {
+ return i.Last()
+ }
+ return false
+ }
+ i.forward = false
+ i.p.mu.RLock()
+ defer i.p.mu.RUnlock()
+ i.node = i.p.findLT(i.key)
+ return i.fill(true, false)
+}
+
+func (i *dbIter) Key() []byte {
+ return i.key
+}
+
+func (i *dbIter) Value() []byte {
+ return i.value
+}
+
+func (i *dbIter) Error() error { return i.err }
+
+func (i *dbIter) Release() {
+ if !i.Released() {
+ i.p = nil
+ i.node = 0
+ i.key = nil
+ i.value = nil
+ i.BasicReleaser.Release()
+ }
+}
+
+const (
+ nKV = iota
+ nKey
+ nVal
+ nHeight
+ nNext
+)
+
+// DB is an in-memory key/value database.
+type DB struct {
+ cmp comparer.BasicComparer
+ rnd *rand.Rand
+
+ mu sync.RWMutex
+ kvData []byte
+ // Node data:
+ // [0] : KV offset
+ // [1] : Key length
+ // [2] : Value length
+ // [3] : Height
+ // [3..height] : Next nodes
+ nodeData []int
+ prevNode [tMaxHeight]int
+ maxHeight int
+ n int
+ kvSize int
+}
+
+func (p *DB) randHeight() (h int) {
+ const branching = 4
+ h = 1
+ for h < tMaxHeight && p.rnd.Int()%branching == 0 {
+ h++
+ }
+ return
+}
+
+// Must hold RW-lock if prev == true, as it use shared prevNode slice.
+func (p *DB) findGE(key []byte, prev bool) (int, bool) {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ cmp := 1
+ if next != 0 {
+ o := p.nodeData[next]
+ cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key)
+ }
+ if cmp < 0 {
+ // Keep searching in this list
+ node = next
+ } else {
+ if prev {
+ p.prevNode[h] = node
+ } else if cmp == 0 {
+ return next, true
+ }
+ if h == 0 {
+ return next, cmp == 0
+ }
+ h--
+ }
+ }
+}
+
+func (p *DB) findLT(key []byte) int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ o := p.nodeData[next]
+ if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+func (p *DB) findLast() int {
+ node := 0
+ h := p.maxHeight - 1
+ for {
+ next := p.nodeData[node+nNext+h]
+ if next == 0 {
+ if h == 0 {
+ break
+ }
+ h--
+ } else {
+ node = next
+ }
+ }
+ return node
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (p *DB) Put(key []byte, value []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if node, exact := p.findGE(key, true); exact {
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ p.nodeData[node] = kvOffset
+ m := p.nodeData[node+nVal]
+ p.nodeData[node+nVal] = len(value)
+ p.kvSize += len(value) - m
+ return nil
+ }
+
+ h := p.randHeight()
+ if h > p.maxHeight {
+ for i := p.maxHeight; i < h; i++ {
+ p.prevNode[i] = 0
+ }
+ p.maxHeight = h
+ }
+
+ kvOffset := len(p.kvData)
+ p.kvData = append(p.kvData, key...)
+ p.kvData = append(p.kvData, value...)
+ // Node
+ node := len(p.nodeData)
+ p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
+ for i, n := range p.prevNode[:h] {
+ m := n + nNext + i
+ p.nodeData = append(p.nodeData, p.nodeData[m])
+ p.nodeData[m] = node
+ }
+
+ p.kvSize += len(key) + len(value)
+ p.n++
+ return nil
+}
+
+// Delete deletes the value for the given key. It returns ErrNotFound if
+// the DB does not contain the key.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (p *DB) Delete(key []byte) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ node, exact := p.findGE(key, true)
+ if !exact {
+ return ErrNotFound
+ }
+
+ h := p.nodeData[node+nHeight]
+ for i, n := range p.prevNode[:h] {
+ m := n + nNext + i
+ p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i]
+ }
+
+ p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal]
+ p.n--
+ return nil
+}
+
+// Contains returns true if the given key are in the DB.
+//
+// It is safe to modify the contents of the arguments after Contains returns.
+func (p *DB) Contains(key []byte) bool {
+ p.mu.RLock()
+ _, exact := p.findGE(key, false)
+ p.mu.RUnlock()
+ return exact
+}
+
+// Get gets the value for the given key. It returns error.ErrNotFound if the
+// DB does not contain the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (p *DB) Get(key []byte) (value []byte, err error) {
+ p.mu.RLock()
+ if node, exact := p.findGE(key, false); exact {
+ o := p.nodeData[node] + p.nodeData[node+nKey]
+ value = p.kvData[o : o+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Find returns.
+func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
+ p.mu.RLock()
+ if node, _ := p.findGE(key, false); node != 0 {
+ n := p.nodeData[node]
+ m := n + p.nodeData[node+nKey]
+ rkey = p.kvData[n:m]
+ value = p.kvData[m : m+p.nodeData[node+nVal]]
+ } else {
+ err = ErrNotFound
+ }
+ p.mu.RUnlock()
+ return
+}
+
+// NewIterator returns an iterator of the DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. However, the resultant key/value pairs are not guaranteed
+// to be a consistent snapshot of the DB at a particular point in time.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (p *DB) NewIterator(slice *util.Range) iterator.Iterator {
+ return &dbIter{p: p, slice: slice}
+}
+
+// Capacity returns keys/values buffer capacity.
+func (p *DB) Capacity() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData)
+}
+
+// Size returns sum of keys and values length. Note that deleted
+// key/value will not be accounted for, but it will still consume
+// the buffer, since the buffer is append only.
+func (p *DB) Size() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.kvSize
+}
+
+// Free returns keys/values free buffer before need to grow.
+func (p *DB) Free() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return cap(p.kvData) - len(p.kvData)
+}
+
+// Len returns the number of entries in the DB.
+func (p *DB) Len() int {
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+ return p.n
+}
+
+// Reset resets the DB to initial empty state. Allows reuse the buffer.
+func (p *DB) Reset() {
+ p.mu.Lock()
+ p.rnd = rand.New(rand.NewSource(0xdeadbeef))
+ p.maxHeight = 1
+ p.n = 0
+ p.kvSize = 0
+ p.kvData = p.kvData[:0]
+ p.nodeData = p.nodeData[:nNext+tMaxHeight]
+ p.nodeData[nKV] = 0
+ p.nodeData[nKey] = 0
+ p.nodeData[nVal] = 0
+ p.nodeData[nHeight] = tMaxHeight
+ for n := 0; n < tMaxHeight; n++ {
+ p.nodeData[nNext+n] = 0
+ p.prevNode[n] = 0
+ }
+ p.mu.Unlock()
+}
+
+// New creates a new initialized in-memory key/value DB. The capacity
+// is the initial key/value buffer capacity. The capacity is advisory,
+// not enforced.
+//
+// This DB is append-only, deleting an entry would remove entry node but not
+// reclaim KV buffer.
+//
+// The returned DB instance is safe for concurrent use.
+func New(cmp comparer.BasicComparer, capacity int) *DB {
+ p := &DB{
+ cmp: cmp,
+ rnd: rand.New(rand.NewSource(0xdeadbeef)),
+ maxHeight: 1,
+ kvData: make([]byte, 0, capacity),
+ nodeData: make([]int, 4+tMaxHeight),
+ }
+ p.nodeData[nHeight] = tMaxHeight
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
new file mode 100644
index 00000000..528b1642
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
@@ -0,0 +1,697 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package opt provides sets of options used by LevelDB.
+package opt
+
+import (
+ "math"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+const (
+ KiB = 1024
+ MiB = KiB * 1024
+ GiB = MiB * 1024
+)
+
+var (
+ DefaultBlockCacher = LRUCacher
+ DefaultBlockCacheCapacity = 8 * MiB
+ DefaultBlockRestartInterval = 16
+ DefaultBlockSize = 4 * KiB
+ DefaultCompactionExpandLimitFactor = 25
+ DefaultCompactionGPOverlapsFactor = 10
+ DefaultCompactionL0Trigger = 4
+ DefaultCompactionSourceLimitFactor = 1
+ DefaultCompactionTableSize = 2 * MiB
+ DefaultCompactionTableSizeMultiplier = 1.0
+ DefaultCompactionTotalSize = 10 * MiB
+ DefaultCompactionTotalSizeMultiplier = 10.0
+ DefaultCompressionType = SnappyCompression
+ DefaultIteratorSamplingRate = 1 * MiB
+ DefaultOpenFilesCacher = LRUCacher
+ DefaultOpenFilesCacheCapacity = 500
+ DefaultWriteBuffer = 4 * MiB
+ DefaultWriteL0PauseTrigger = 12
+ DefaultWriteL0SlowdownTrigger = 8
+)
+
+// Cacher is a caching algorithm.
+type Cacher interface {
+ New(capacity int) cache.Cacher
+}
+
+type CacherFunc struct {
+ NewFunc func(capacity int) cache.Cacher
+}
+
+func (f *CacherFunc) New(capacity int) cache.Cacher {
+ if f.NewFunc != nil {
+ return f.NewFunc(capacity)
+ }
+ return nil
+}
+
+func noCacher(int) cache.Cacher { return nil }
+
+var (
+ // LRUCacher is the LRU-cache algorithm.
+ LRUCacher = &CacherFunc{cache.NewLRU}
+
+ // NoCacher is the value to disable caching algorithm.
+ NoCacher = &CacherFunc{}
+)
+
+// Compression is the 'sorted table' block compression algorithm to use.
+type Compression uint
+
+func (c Compression) String() string {
+ switch c {
+ case DefaultCompression:
+ return "default"
+ case NoCompression:
+ return "none"
+ case SnappyCompression:
+ return "snappy"
+ }
+ return "invalid"
+}
+
+const (
+ DefaultCompression Compression = iota
+ NoCompression
+ SnappyCompression
+ nCompression
+)
+
+// Strict is the DB 'strict level'.
+type Strict uint
+
+const (
+ // If present then a corrupted or invalid chunk or block in manifest
+ // journal will cause an error instead of being dropped.
+ // This will prevent database with corrupted manifest to be opened.
+ StrictManifest Strict = 1 << iota
+
+ // If present then journal chunk checksum will be verified.
+ StrictJournalChecksum
+
+ // If present then a corrupted or invalid chunk or block in journal
+ // will cause an error instead of being dropped.
+ // This will prevent database with corrupted journal to be opened.
+ StrictJournal
+
+ // If present then 'sorted table' block checksum will be verified.
+ // This has effect on both 'read operation' and compaction.
+ StrictBlockChecksum
+
+ // If present then a corrupted 'sorted table' will fails compaction.
+ // The database will enter read-only mode.
+ StrictCompaction
+
+ // If present then a corrupted 'sorted table' will halts 'read operation'.
+ StrictReader
+
+ // If present then leveldb.Recover will drop corrupted 'sorted table'.
+ StrictRecovery
+
+ // This only applicable for ReadOptions, if present then this ReadOptions
+ // 'strict level' will override global ones.
+ StrictOverride
+
+ // StrictAll enables all strict flags.
+ StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
+
+ // DefaultStrict is the default strict flags. Specify any strict flags
+ // will override default strict flags as whole (i.e. not OR'ed).
+ DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
+
+ // NoStrict disables all strict flags. Override default strict flags.
+ NoStrict = ^StrictAll
+)
+
+// Options holds the optional parameters for the DB at large.
+type Options struct {
+ // AltFilters defines one or more 'alternative filters'.
+ // 'alternative filters' will be used during reads if a filter block
+ // does not match with the 'effective filter'.
+ //
+ // The default value is nil
+ AltFilters []filter.Filter
+
+ // BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ BlockCacher Cacher
+
+ // BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
+ //
+ // The default value is 8MiB.
+ BlockCacheCapacity int
+
+ // BlockCacheEvictRemoved allows enable forced-eviction on cached block belonging
+ // to removed 'sorted table'.
+ //
+ // The default if false.
+ BlockCacheEvictRemoved bool
+
+ // BlockRestartInterval is the number of keys between restart points for
+ // delta encoding of keys.
+ //
+ // The default value is 16.
+ BlockRestartInterval int
+
+ // BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
+ // block.
+ //
+ // The default value is 4KiB.
+ BlockSize int
+
+ // CompactionExpandLimitFactor limits compaction size after expanded.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 25.
+ CompactionExpandLimitFactor int
+
+ // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
+ // single 'sorted table' generates.
+ // This will be multiplied by table size limit at grandparent level.
+ //
+ // The default value is 10.
+ CompactionGPOverlapsFactor int
+
+ // CompactionL0Trigger defines number of 'sorted table' at level-0 that will
+ // trigger compaction.
+ //
+ // The default value is 4.
+ CompactionL0Trigger int
+
+ // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
+ // level-0.
+ // This will be multiplied by table size limit at compaction target level.
+ //
+ // The default value is 1.
+ CompactionSourceLimitFactor int
+
+ // CompactionTableSize limits size of 'sorted table' that compaction generates.
+ // The limits for each level will be calculated as:
+ // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
+ //
+ // The default value is 2MiB.
+ CompactionTableSize int
+
+ // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
+ //
+ // The default value is 1.
+ CompactionTableSizeMultiplier float64
+
+ // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTableSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTableSizeMultiplierPerLevel []float64
+
+ // CompactionTotalSize limits total size of 'sorted table' for each level.
+ // The limits for each level will be calculated as:
+ // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
+ // The multiplier for each level can also fine-tuned using
+ // CompactionTotalSizeMultiplierPerLevel.
+ //
+ // The default value is 10MiB.
+ CompactionTotalSize int
+
+ // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
+ //
+ // The default value is 10.
+ CompactionTotalSizeMultiplier float64
+
+ // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
+ // CompactionTotalSize.
+ // Use zero to skip a level.
+ //
+ // The default value is nil.
+ CompactionTotalSizeMultiplierPerLevel []float64
+
+ // Comparer defines a total ordering over the space of []byte keys: a 'less
+ // than' relationship. The same comparison algorithm must be used for reads
+ // and writes over the lifetime of the DB.
+ //
+ // The default value uses the same ordering as bytes.Compare.
+ Comparer comparer.Comparer
+
+ // Compression defines the 'sorted table' block compression to use.
+ //
+ // The default value (DefaultCompression) uses snappy compression.
+ Compression Compression
+
+ // DisableBufferPool allows disable use of util.BufferPool functionality.
+ //
+ // The default value is false.
+ DisableBufferPool bool
+
+ // DisableBlockCache allows disable use of cache.Cache functionality on
+ // 'sorted table' block.
+ //
+ // The default value is false.
+ DisableBlockCache bool
+
+ // DisableCompactionBackoff allows disable compaction retry backoff.
+ //
+ // The default value is false.
+ DisableCompactionBackoff bool
+
+ // DisableLargeBatchTransaction allows disabling switch-to-transaction mode
+ // on large batch write. If enable batch writes large than WriteBuffer will
+ // use transaction.
+ //
+ // The default is false.
+ DisableLargeBatchTransaction bool
+
+ // ErrorIfExist defines whether an error should returned if the DB already
+ // exist.
+ //
+ // The default value is false.
+ ErrorIfExist bool
+
+ // ErrorIfMissing defines whether an error should returned if the DB is
+ // missing. If false then the database will be created if missing, otherwise
+ // an error will be returned.
+ //
+ // The default value is false.
+ ErrorIfMissing bool
+
+ // Filter defines an 'effective filter' to use. An 'effective filter'
+ // if defined will be used to generate per-table filter block.
+ // The filter name will be stored on disk.
+ // During reads LevelDB will try to find matching filter from
+ // 'effective filter' and 'alternative filters'.
+ //
+ // Filter can be changed after a DB has been created. It is recommended
+ // to put old filter to the 'alternative filters' to mitigate lack of
+ // filter during transition period.
+ //
+ // A filter is used to reduce disk reads when looking for a specific key.
+ //
+ // The default value is nil.
+ Filter filter.Filter
+
+ // IteratorSamplingRate defines approximate gap (in bytes) between read
+ // sampling of an iterator. The samples will be used to determine when
+ // compaction should be triggered.
+ //
+ // The default is 1MiB.
+ IteratorSamplingRate int
+
+ // NoSync allows completely disable fsync.
+ //
+ // The default is false.
+ NoSync bool
+
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // OpenFilesCacher provides cache algorithm for open files caching.
+ // Specify NoCacher to disable caching algorithm.
+ //
+ // The default value is LRUCacher.
+ OpenFilesCacher Cacher
+
+ // OpenFilesCacheCapacity defines the capacity of the open files caching.
+ // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
+ //
+ // The default value is 500.
+ OpenFilesCacheCapacity int
+
+ // If true then opens DB in read-only mode.
+ //
+ // The default value is false.
+ ReadOnly bool
+
+ // Strict defines the DB strict level.
+ Strict Strict
+
+ // WriteBuffer defines maximum size of a 'memdb' before flushed to
+ // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
+ // unsorted journal.
+ //
+ // LevelDB may held up to two 'memdb' at the same time.
+ //
+ // The default value is 4MiB.
+ WriteBuffer int
+
+ // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
+ // pause write.
+ //
+ // The default value is 12.
+ WriteL0PauseTrigger int
+
+ // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
+ // will trigger write slowdown.
+ //
+ // The default value is 8.
+ WriteL0SlowdownTrigger int
+}
+
+func (o *Options) GetAltFilters() []filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.AltFilters
+}
+
+func (o *Options) GetBlockCacher() Cacher {
+ if o == nil || o.BlockCacher == nil {
+ return DefaultBlockCacher
+ } else if o.BlockCacher == NoCacher {
+ return nil
+ }
+ return o.BlockCacher
+}
+
+func (o *Options) GetBlockCacheCapacity() int {
+ if o == nil || o.BlockCacheCapacity == 0 {
+ return DefaultBlockCacheCapacity
+ } else if o.BlockCacheCapacity < 0 {
+ return 0
+ }
+ return o.BlockCacheCapacity
+}
+
+func (o *Options) GetBlockCacheEvictRemoved() bool {
+ if o == nil {
+ return false
+ }
+ return o.BlockCacheEvictRemoved
+}
+
+func (o *Options) GetBlockRestartInterval() int {
+ if o == nil || o.BlockRestartInterval <= 0 {
+ return DefaultBlockRestartInterval
+ }
+ return o.BlockRestartInterval
+}
+
+func (o *Options) GetBlockSize() int {
+ if o == nil || o.BlockSize <= 0 {
+ return DefaultBlockSize
+ }
+ return o.BlockSize
+}
+
+func (o *Options) GetCompactionExpandLimit(level int) int {
+ factor := DefaultCompactionExpandLimitFactor
+ if o != nil && o.CompactionExpandLimitFactor > 0 {
+ factor = o.CompactionExpandLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionGPOverlaps(level int) int {
+ factor := DefaultCompactionGPOverlapsFactor
+ if o != nil && o.CompactionGPOverlapsFactor > 0 {
+ factor = o.CompactionGPOverlapsFactor
+ }
+ return o.GetCompactionTableSize(level+2) * factor
+}
+
+func (o *Options) GetCompactionL0Trigger() int {
+ if o == nil || o.CompactionL0Trigger == 0 {
+ return DefaultCompactionL0Trigger
+ }
+ return o.CompactionL0Trigger
+}
+
+func (o *Options) GetCompactionSourceLimit(level int) int {
+ factor := DefaultCompactionSourceLimitFactor
+ if o != nil && o.CompactionSourceLimitFactor > 0 {
+ factor = o.CompactionSourceLimitFactor
+ }
+ return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionTableSize(level int) int {
+ var (
+ base = DefaultCompactionTableSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTableSize > 0 {
+ base = o.CompactionTableSize
+ }
+ if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTableSizeMultiplierPerLevel[level]
+ } else if o.CompactionTableSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
+ }
+ return int(float64(base) * mult)
+}
+
+func (o *Options) GetCompactionTotalSize(level int) int64 {
+ var (
+ base = DefaultCompactionTotalSize
+ mult float64
+ )
+ if o != nil {
+ if o.CompactionTotalSize > 0 {
+ base = o.CompactionTotalSize
+ }
+ if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
+ mult = o.CompactionTotalSizeMultiplierPerLevel[level]
+ } else if o.CompactionTotalSizeMultiplier > 0 {
+ mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
+ }
+ }
+ if mult == 0 {
+ mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
+ }
+ return int64(float64(base) * mult)
+}
+
+func (o *Options) GetComparer() comparer.Comparer {
+ if o == nil || o.Comparer == nil {
+ return comparer.DefaultComparer
+ }
+ return o.Comparer
+}
+
+func (o *Options) GetCompression() Compression {
+ if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression {
+ return DefaultCompressionType
+ }
+ return o.Compression
+}
+
+func (o *Options) GetDisableBufferPool() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBufferPool
+}
+
+func (o *Options) GetDisableBlockCache() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableBlockCache
+}
+
+func (o *Options) GetDisableCompactionBackoff() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableCompactionBackoff
+}
+
+func (o *Options) GetDisableLargeBatchTransaction() bool {
+ if o == nil {
+ return false
+ }
+ return o.DisableLargeBatchTransaction
+}
+
+func (o *Options) GetErrorIfExist() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfExist
+}
+
+func (o *Options) GetErrorIfMissing() bool {
+ if o == nil {
+ return false
+ }
+ return o.ErrorIfMissing
+}
+
+func (o *Options) GetFilter() filter.Filter {
+ if o == nil {
+ return nil
+ }
+ return o.Filter
+}
+
+func (o *Options) GetIteratorSamplingRate() int {
+ if o == nil || o.IteratorSamplingRate <= 0 {
+ return DefaultIteratorSamplingRate
+ }
+ return o.IteratorSamplingRate
+}
+
+func (o *Options) GetNoSync() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoSync
+}
+
+func (o *Options) GetNoWriteMerge() bool {
+ if o == nil {
+ return false
+ }
+ return o.NoWriteMerge
+}
+
+func (o *Options) GetOpenFilesCacher() Cacher {
+ if o == nil || o.OpenFilesCacher == nil {
+ return DefaultOpenFilesCacher
+ }
+ if o.OpenFilesCacher == NoCacher {
+ return nil
+ }
+ return o.OpenFilesCacher
+}
+
+func (o *Options) GetOpenFilesCacheCapacity() int {
+ if o == nil || o.OpenFilesCacheCapacity == 0 {
+ return DefaultOpenFilesCacheCapacity
+ } else if o.OpenFilesCacheCapacity < 0 {
+ return 0
+ }
+ return o.OpenFilesCacheCapacity
+}
+
+func (o *Options) GetReadOnly() bool {
+ if o == nil {
+ return false
+ }
+ return o.ReadOnly
+}
+
+func (o *Options) GetStrict(strict Strict) bool {
+ if o == nil || o.Strict == 0 {
+ return DefaultStrict&strict != 0
+ }
+ return o.Strict&strict != 0
+}
+
+func (o *Options) GetWriteBuffer() int {
+ if o == nil || o.WriteBuffer <= 0 {
+ return DefaultWriteBuffer
+ }
+ return o.WriteBuffer
+}
+
+func (o *Options) GetWriteL0PauseTrigger() int {
+ if o == nil || o.WriteL0PauseTrigger == 0 {
+ return DefaultWriteL0PauseTrigger
+ }
+ return o.WriteL0PauseTrigger
+}
+
+func (o *Options) GetWriteL0SlowdownTrigger() int {
+ if o == nil || o.WriteL0SlowdownTrigger == 0 {
+ return DefaultWriteL0SlowdownTrigger
+ }
+ return o.WriteL0SlowdownTrigger
+}
+
+// ReadOptions holds the optional parameters for 'read operation'. The
+// 'read operation' includes Get, Find and NewIterator.
+type ReadOptions struct {
+ // DontFillCache defines whether block reads for this 'read operation'
+ // should be cached. If false then the block will be cached. This does
+ // not affects already cached block.
+ //
+ // The default value is false.
+ DontFillCache bool
+
+ // Strict will be OR'ed with global DB 'strict level' unless StrictOverride
+ // is present. Currently only StrictReader that has effect here.
+ Strict Strict
+}
+
+func (ro *ReadOptions) GetDontFillCache() bool {
+ if ro == nil {
+ return false
+ }
+ return ro.DontFillCache
+}
+
+func (ro *ReadOptions) GetStrict(strict Strict) bool {
+ if ro == nil {
+ return false
+ }
+ return ro.Strict&strict != 0
+}
+
+// WriteOptions holds the optional parameters for 'write operation'. The
+// 'write operation' includes Write, Put and Delete.
+type WriteOptions struct {
+ // NoWriteMerge allows disabling write merge.
+ //
+ // The default is false.
+ NoWriteMerge bool
+
+ // Sync is whether to sync underlying writes from the OS buffer cache
+ // through to actual disk, if applicable. Setting Sync can result in
+ // slower writes.
+ //
+ // If false, and the machine crashes, then some recent writes may be lost.
+ // Note that if it is just the process that crashes (and the machine does
+ // not) then no writes will be lost.
+ //
+ // In other words, Sync being false has the same semantics as a write
+ // system call. Sync being true means write followed by fsync.
+ //
+ // The default value is false.
+ Sync bool
+}
+
+func (wo *WriteOptions) GetNoWriteMerge() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.NoWriteMerge
+}
+
+func (wo *WriteOptions) GetSync() bool {
+ if wo == nil {
+ return false
+ }
+ return wo.Sync
+}
+
+func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
+ if ro.GetStrict(StrictOverride) {
+ return ro.GetStrict(strict)
+ } else {
+ return o.GetStrict(strict) || ro.GetStrict(strict)
+ }
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
new file mode 100644
index 00000000..b072b1ac
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
@@ -0,0 +1,107 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func dupOptions(o *opt.Options) *opt.Options {
+ newo := &opt.Options{}
+ if o != nil {
+ *newo = *o
+ }
+ if newo.Strict == 0 {
+ newo.Strict = opt.DefaultStrict
+ }
+ return newo
+}
+
+func (s *session) setOptions(o *opt.Options) {
+ no := dupOptions(o)
+ // Alternative filters.
+ if filters := o.GetAltFilters(); len(filters) > 0 {
+ no.AltFilters = make([]filter.Filter, len(filters))
+ for i, filter := range filters {
+ no.AltFilters[i] = &iFilter{filter}
+ }
+ }
+ // Comparer.
+ s.icmp = &iComparer{o.GetComparer()}
+ no.Comparer = s.icmp
+ // Filter.
+ if filter := o.GetFilter(); filter != nil {
+ no.Filter = &iFilter{filter}
+ }
+
+ s.o = &cachedOptions{Options: no}
+ s.o.cache()
+}
+
+const optCachedLevel = 7
+
+type cachedOptions struct {
+ *opt.Options
+
+ compactionExpandLimit []int
+ compactionGPOverlaps []int
+ compactionSourceLimit []int
+ compactionTableSize []int
+ compactionTotalSize []int64
+}
+
+func (co *cachedOptions) cache() {
+ co.compactionExpandLimit = make([]int, optCachedLevel)
+ co.compactionGPOverlaps = make([]int, optCachedLevel)
+ co.compactionSourceLimit = make([]int, optCachedLevel)
+ co.compactionTableSize = make([]int, optCachedLevel)
+ co.compactionTotalSize = make([]int64, optCachedLevel)
+
+ for level := 0; level < optCachedLevel; level++ {
+ co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
+ co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
+ co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
+ co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
+ co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
+ }
+}
+
+func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionExpandLimit[level]
+ }
+ return co.Options.GetCompactionExpandLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
+ if level < optCachedLevel {
+ return co.compactionGPOverlaps[level]
+ }
+ return co.Options.GetCompactionGPOverlaps(level)
+}
+
+func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
+ if level < optCachedLevel {
+ return co.compactionSourceLimit[level]
+ }
+ return co.Options.GetCompactionSourceLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionTableSize(level int) int {
+ if level < optCachedLevel {
+ return co.compactionTableSize[level]
+ }
+ return co.Options.GetCompactionTableSize(level)
+}
+
+func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
+ if level < optCachedLevel {
+ return co.compactionTotalSize[level]
+ }
+ return co.Options.GetCompactionTotalSize(level)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
new file mode 100644
index 00000000..3f391f93
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
@@ -0,0 +1,210 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "sync"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrManifestCorrupted records manifest corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrManifestCorrupted struct {
+ Field string
+ Reason string
+}
+
+func (e *ErrManifestCorrupted) Error() string {
+ return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
+}
+
+func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
+ return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
+}
+
+// session represent a persistent database session.
+type session struct {
+ // Need 64-bit alignment.
+ stNextFileNum int64 // current unused file number
+ stJournalNum int64 // current journal file number; need external synchronization
+ stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
+ stTempFileNum int64
+ stSeqNum uint64 // last mem compacted seq; need external synchronization
+
+ stor *iStorage
+ storLock storage.Locker
+ o *cachedOptions
+ icmp *iComparer
+ tops *tOps
+ fileRef map[int64]int
+
+ manifest *journal.Writer
+ manifestWriter storage.Writer
+ manifestFd storage.FileDesc
+
+ stCompPtrs []internalKey // compaction pointers; need external synchronization
+ stVersion *version // current version
+ vmu sync.Mutex
+}
+
+// Creates new initialized session instance.
+func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
+ if stor == nil {
+ return nil, os.ErrInvalid
+ }
+ storLock, err := stor.Lock()
+ if err != nil {
+ return
+ }
+ s = &session{
+ stor: newIStorage(stor),
+ storLock: storLock,
+ fileRef: make(map[int64]int),
+ }
+ s.setOptions(o)
+ s.tops = newTableOps(s)
+ s.setVersion(newVersion(s))
+ s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
+ return
+}
+
+// Close session.
+func (s *session) close() {
+ s.tops.close()
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ s.manifest = nil
+ s.manifestWriter = nil
+ s.setVersion(&version{s: s, closing: true})
+}
+
+// Release session lock.
+func (s *session) release() {
+ s.storLock.Unlock()
+}
+
+// Create a new database session; need external synchronization.
+func (s *session) create() error {
+ // create manifest
+ return s.newManifest(nil, nil)
+}
+
+// Recover a database session; need external synchronization.
+func (s *session) recover() (err error) {
+ defer func() {
+ if os.IsNotExist(err) {
+ // Don't return os.ErrNotExist if the underlying storage contains
+ // other files that belong to LevelDB. So the DB won't get trashed.
+ if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
+ err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
+ }
+ }
+ }()
+
+ fd, err := s.stor.GetMeta()
+ if err != nil {
+ return
+ }
+
+ reader, err := s.stor.Open(fd)
+ if err != nil {
+ return
+ }
+ defer reader.Close()
+
+ var (
+ // Options.
+ strict = s.o.GetStrict(opt.StrictManifest)
+
+ jr = journal.NewReader(reader, dropper{s, fd}, strict, true)
+ rec = &sessionRecord{}
+ staging = s.stVersion.newStaging()
+ )
+ for {
+ var r io.Reader
+ r, err = jr.Next()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ break
+ }
+ return errors.SetFd(err, fd)
+ }
+
+ err = rec.decode(r)
+ if err == nil {
+ // save compact pointers
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+ // commit record to version staging
+ staging.commit(rec)
+ } else {
+ err = errors.SetFd(err, fd)
+ if strict || !errors.IsCorrupted(err) {
+ return
+ }
+ s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
+ }
+ rec.resetCompPtrs()
+ rec.resetAddedTables()
+ rec.resetDeletedTables()
+ }
+
+ switch {
+ case !rec.has(recComparer):
+ return newErrManifestCorrupted(fd, "comparer", "missing")
+ case rec.comparer != s.icmp.uName():
+ return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
+ case !rec.has(recNextFileNum):
+ return newErrManifestCorrupted(fd, "next-file-num", "missing")
+ case !rec.has(recJournalNum):
+ return newErrManifestCorrupted(fd, "journal-file-num", "missing")
+ case !rec.has(recSeqNum):
+ return newErrManifestCorrupted(fd, "seq-num", "missing")
+ }
+
+ s.manifestFd = fd
+ s.setVersion(staging.finish())
+ s.setNextFileNum(rec.nextFileNum)
+ s.recordCommited(rec)
+ return nil
+}
+
+// Commit session; need external synchronization.
+func (s *session) commit(r *sessionRecord) (err error) {
+ v := s.version()
+ defer v.release()
+
+ // spawn new version based on current version
+ nv := v.spawn(r)
+
+ if s.manifest == nil {
+ // manifest journal writer not yet created, create one
+ err = s.newManifest(r, nv)
+ } else {
+ err = s.flushManifest(r)
+ }
+
+ // finally, apply new version if no error rise
+ if err == nil {
+ s.setVersion(nv)
+ }
+
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
new file mode 100644
index 00000000..089cd00b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
@@ -0,0 +1,302 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/memdb"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
+ v := s.version()
+ defer v.release()
+ return v.pickMemdbLevel(umin, umax, maxLevel)
+}
+
+func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
+ // Create sorted table.
+ iter := mdb.NewIterator(nil)
+ defer iter.Release()
+ t, n, err := s.tops.createFrom(iter)
+ if err != nil {
+ return 0, err
+ }
+
+ // Pick level other than zero can cause compaction issue with large
+ // bulk insert and delete on strictly incrementing key-space. The
+ // problem is that the small deletion markers trapped at lower level,
+ // while key/value entries keep growing at higher level. Since the
+ // key-space is strictly incrementing it will not overlaps with
+ // higher level, thus maximum possible level is always picked, while
+ // overlapping deletion marker pushed into lower level.
+ // See: https://github.com/syndtr/goleveldb/issues/127.
+ flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
+ rec.addTableFile(flushLevel, t)
+
+ s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+ return flushLevel, nil
+}
+
+// Pick a compaction based on current state; need external synchronization.
+func (s *session) pickCompaction() *compaction {
+ v := s.version()
+
+ var sourceLevel int
+ var t0 tFiles
+ if v.cScore >= 1 {
+ sourceLevel = v.cLevel
+ cptr := s.getCompPtr(sourceLevel)
+ tables := v.levels[sourceLevel]
+ for _, t := range tables {
+ if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
+ t0 = append(t0, t)
+ break
+ }
+ }
+ if len(t0) == 0 {
+ t0 = append(t0, tables[0])
+ }
+ } else {
+ if p := atomic.LoadPointer(&v.cSeek); p != nil {
+ ts := (*tSet)(p)
+ sourceLevel = ts.level
+ t0 = append(t0, ts.table)
+ } else {
+ v.release()
+ return nil
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+// Create compaction from given level and range; need external synchronization.
+func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
+ v := s.version()
+
+ if sourceLevel >= len(v.levels) {
+ v.release()
+ return nil
+ }
+
+ t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
+ if len(t0) == 0 {
+ v.release()
+ return nil
+ }
+
+ // Avoid compacting too much in one shot in case the range is large.
+ // But we cannot do this for level-0 since level-0 files can overlap
+ // and we must not pick one file and drop another older file if the
+ // two files overlap.
+ if !noLimit && sourceLevel > 0 {
+ limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
+ total := int64(0)
+ for i, t := range t0 {
+ total += t.size
+ if total >= limit {
+ s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
+ t0 = t0[:i+1]
+ break
+ }
+ }
+ }
+
+ return newCompaction(s, v, sourceLevel, t0)
+}
+
+func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
+ c := &compaction{
+ s: s,
+ v: v,
+ sourceLevel: sourceLevel,
+ levels: [2]tFiles{t0, nil},
+ maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
+ tPtrs: make([]int, len(v.levels)),
+ }
+ c.expand()
+ c.save()
+ return c
+}
+
+// compaction represent a compaction state.
+type compaction struct {
+ s *session
+ v *version
+
+ sourceLevel int
+ levels [2]tFiles
+ maxGPOverlaps int64
+
+ gp tFiles
+ gpi int
+ seenKey bool
+ gpOverlappedBytes int64
+ imin, imax internalKey
+ tPtrs []int
+ released bool
+
+ snapGPI int
+ snapSeenKey bool
+ snapGPOverlappedBytes int64
+ snapTPtrs []int
+}
+
+func (c *compaction) save() {
+ c.snapGPI = c.gpi
+ c.snapSeenKey = c.seenKey
+ c.snapGPOverlappedBytes = c.gpOverlappedBytes
+ c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
+
+func (c *compaction) restore() {
+ c.gpi = c.snapGPI
+ c.seenKey = c.snapSeenKey
+ c.gpOverlappedBytes = c.snapGPOverlappedBytes
+ c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
+
+func (c *compaction) release() {
+ if !c.released {
+ c.released = true
+ c.v.release()
+ }
+}
+
+// Expand compacted tables; need external synchronization.
+func (c *compaction) expand() {
+ limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
+ vt0 := c.v.levels[c.sourceLevel]
+ vt1 := tFiles{}
+ if level := c.sourceLevel + 1; level < len(c.v.levels) {
+ vt1 = c.v.levels[level]
+ }
+
+ t0, t1 := c.levels[0], c.levels[1]
+ imin, imax := t0.getRange(c.s.icmp)
+ // We expand t0 here just incase ukey hop across tables.
+ t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
+ if len(t0) != len(c.levels[0]) {
+ imin, imax = t0.getRange(c.s.icmp)
+ }
+ t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+ // Get entire range covered by compaction.
+ amin, amax := append(t0, t1...).getRange(c.s.icmp)
+
+ // See if we can grow the number of inputs in "sourceLevel" without
+ // changing the number of "sourceLevel+1" files we pick up.
+ if len(t1) > 0 {
+ exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
+ if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+ xmin, xmax := exp0.getRange(c.s.icmp)
+ exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
+ if len(exp1) == len(t1) {
+ c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+ c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+ len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
+ imin, imax = xmin, xmax
+ t0, t1 = exp0, exp1
+ amin, amax = append(t0, t1...).getRange(c.s.icmp)
+ }
+ }
+ }
+
+ // Compute the set of grandparent files that overlap this compaction
+ // (parent == sourceLevel+1; grandparent == sourceLevel+2)
+ if level := c.sourceLevel + 2; level < len(c.v.levels) {
+ c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
+ }
+
+ c.levels[0], c.levels[1] = t0, t1
+ c.imin, c.imax = imin, imax
+}
+
+// Check whether compaction is trivial.
+func (c *compaction) trivial() bool {
+ return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
+}
+
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+ for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
+ tables := c.v.levels[level]
+ for c.tPtrs[level] < len(tables) {
+ t := tables[c.tPtrs[level]]
+ if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+ // We've advanced far enough.
+ if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ // Key falls in this file's range, so definitely not base level.
+ return false
+ }
+ break
+ }
+ c.tPtrs[level]++
+ }
+ }
+ return true
+}
+
+func (c *compaction) shouldStopBefore(ikey internalKey) bool {
+ for ; c.gpi < len(c.gp); c.gpi++ {
+ gp := c.gp[c.gpi]
+ if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
+ break
+ }
+ if c.seenKey {
+ c.gpOverlappedBytes += gp.size
+ }
+ }
+ c.seenKey = true
+
+ if c.gpOverlappedBytes > c.maxGPOverlaps {
+ // Too much overlap for current output; start new output.
+ c.gpOverlappedBytes = 0
+ return true
+ }
+ return false
+}
+
+// Creates an iterator.
+func (c *compaction) newIterator() iterator.Iterator {
+ // Creates iterator slice.
+ icap := len(c.levels)
+ if c.sourceLevel == 0 {
+ // Special case for level-0.
+ icap = len(c.levels[0]) + 1
+ }
+ its := make([]iterator.Iterator, 0, icap)
+
+ // Options.
+ ro := &opt.ReadOptions{
+ DontFillCache: true,
+ Strict: opt.StrictOverride,
+ }
+ strict := c.s.o.GetStrict(opt.StrictCompaction)
+ if strict {
+ ro.Strict |= opt.StrictReader
+ }
+
+ for i, tables := range c.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ // Level-0 is not sorted and may overlaps each other.
+ if c.sourceLevel+i == 0 {
+ for _, t := range tables {
+ its = append(its, c.s.tops.newIterator(t, nil, ro))
+ }
+ } else {
+ it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
+ its = append(its, it)
+ }
+ }
+
+ return iterator.NewMergedIterator(its, c.s.icmp, strict)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
new file mode 100644
index 00000000..854e1aa6
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
@@ -0,0 +1,323 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "bufio"
+ "encoding/binary"
+ "io"
+ "strings"
+
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type byteReader interface {
+ io.Reader
+ io.ByteReader
+}
+
+// These numbers are written to disk and should not be changed.
+const (
+ recComparer = 1
+ recJournalNum = 2
+ recNextFileNum = 3
+ recSeqNum = 4
+ recCompPtr = 5
+ recDelTable = 6
+ recAddTable = 7
+ // 8 was used for large value refs
+ recPrevJournalNum = 9
+)
+
+type cpRecord struct {
+ level int
+ ikey internalKey
+}
+
+type atRecord struct {
+ level int
+ num int64
+ size int64
+ imin internalKey
+ imax internalKey
+}
+
+type dtRecord struct {
+ level int
+ num int64
+}
+
+type sessionRecord struct {
+ hasRec int
+ comparer string
+ journalNum int64
+ prevJournalNum int64
+ nextFileNum int64
+ seqNum uint64
+ compPtrs []cpRecord
+ addedTables []atRecord
+ deletedTables []dtRecord
+
+ scratch [binary.MaxVarintLen64]byte
+ err error
+}
+
+func (p *sessionRecord) has(rec int) bool {
+ return p.hasRec&(1<
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/journal"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// Logging.
+
+type dropper struct {
+ s *session
+ fd storage.FileDesc
+}
+
+func (d dropper) Drop(err error) {
+ if e, ok := err.(*journal.ErrCorrupted); ok {
+ d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
+ } else {
+ d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
+ }
+}
+
+func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) }
+func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
+
+// File utils.
+
+func (s *session) newTemp() storage.FileDesc {
+ num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
+ return storage.FileDesc{Type: storage.TypeTemp, Num: num}
+}
+
+func (s *session) addFileRef(fd storage.FileDesc, ref int) int {
+ ref += s.fileRef[fd.Num]
+ if ref > 0 {
+ s.fileRef[fd.Num] = ref
+ } else if ref == 0 {
+ delete(s.fileRef, fd.Num)
+ } else {
+ panic(fmt.Sprintf("negative ref: %v", fd))
+ }
+ return ref
+}
+
+// Session state.
+
+// Get current version. This will incr version ref, must call
+// version.release (exactly once) after use.
+func (s *session) version() *version {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ s.stVersion.incref()
+ return s.stVersion
+}
+
+func (s *session) tLen(level int) int {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ return s.stVersion.tLen(level)
+}
+
+// Set current version to v.
+func (s *session) setVersion(v *version) {
+ s.vmu.Lock()
+ defer s.vmu.Unlock()
+ // Hold by session. It is important to call this first before releasing
+ // current version, otherwise the still used files might get released.
+ v.incref()
+ if s.stVersion != nil {
+ // Release current version.
+ s.stVersion.releaseNB()
+ }
+ s.stVersion = v
+}
+
+// Get current unused file number.
+func (s *session) nextFileNum() int64 {
+ return atomic.LoadInt64(&s.stNextFileNum)
+}
+
+// Set current unused file number to num.
+func (s *session) setNextFileNum(num int64) {
+ atomic.StoreInt64(&s.stNextFileNum, num)
+}
+
+// Mark file number as used.
+func (s *session) markFileNum(num int64) {
+ nextFileNum := num + 1
+ for {
+ old, x := s.stNextFileNum, nextFileNum
+ if old > x {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Allocate a file number.
+func (s *session) allocFileNum() int64 {
+ return atomic.AddInt64(&s.stNextFileNum, 1) - 1
+}
+
+// Reuse given file number.
+func (s *session) reuseFileNum(num int64) {
+ for {
+ old, x := s.stNextFileNum, num
+ if old != x+1 {
+ x = old
+ }
+ if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+ break
+ }
+ }
+}
+
+// Set compaction ptr at given level; need external synchronization.
+func (s *session) setCompPtr(level int, ik internalKey) {
+ if level >= len(s.stCompPtrs) {
+ newCompPtrs := make([]internalKey, level+1)
+ copy(newCompPtrs, s.stCompPtrs)
+ s.stCompPtrs = newCompPtrs
+ }
+ s.stCompPtrs[level] = append(internalKey{}, ik...)
+}
+
+// Get compaction ptr at given level; need external synchronization.
+func (s *session) getCompPtr(level int) internalKey {
+ if level >= len(s.stCompPtrs) {
+ return nil
+ }
+ return s.stCompPtrs[level]
+}
+
+// Manifest related utils.
+
+// Fill given session record obj with current states; need external
+// synchronization.
+func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
+ r.setNextFileNum(s.nextFileNum())
+
+ if snapshot {
+ if !r.has(recJournalNum) {
+ r.setJournalNum(s.stJournalNum)
+ }
+
+ if !r.has(recSeqNum) {
+ r.setSeqNum(s.stSeqNum)
+ }
+
+ for level, ik := range s.stCompPtrs {
+ if ik != nil {
+ r.addCompPtr(level, ik)
+ }
+ }
+
+ r.setComparer(s.icmp.uName())
+ }
+}
+
+// Mark if record has been committed, this will update session state;
+// need external synchronization.
+func (s *session) recordCommited(rec *sessionRecord) {
+ if rec.has(recJournalNum) {
+ s.stJournalNum = rec.journalNum
+ }
+
+ if rec.has(recPrevJournalNum) {
+ s.stPrevJournalNum = rec.prevJournalNum
+ }
+
+ if rec.has(recSeqNum) {
+ s.stSeqNum = rec.seqNum
+ }
+
+ for _, r := range rec.compPtrs {
+ s.setCompPtr(r.level, internalKey(r.ikey))
+ }
+}
+
+// Create a new manifest file; need external synchronization.
+func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
+ fd := storage.FileDesc{Type: storage.TypeManifest, Num: s.allocFileNum()}
+ writer, err := s.stor.Create(fd)
+ if err != nil {
+ return
+ }
+ jw := journal.NewWriter(writer)
+
+ if v == nil {
+ v = s.version()
+ defer v.release()
+ }
+ if rec == nil {
+ rec = &sessionRecord{}
+ }
+ s.fillRecord(rec, true)
+ v.fillRecord(rec)
+
+ defer func() {
+ if err == nil {
+ s.recordCommited(rec)
+ if s.manifest != nil {
+ s.manifest.Close()
+ }
+ if s.manifestWriter != nil {
+ s.manifestWriter.Close()
+ }
+ if !s.manifestFd.Zero() {
+ s.stor.Remove(s.manifestFd)
+ }
+ s.manifestFd = fd
+ s.manifestWriter = writer
+ s.manifest = jw
+ } else {
+ writer.Close()
+ s.stor.Remove(fd)
+ s.reuseFileNum(fd.Num)
+ }
+ }()
+
+ w, err := jw.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = jw.Flush()
+ if err != nil {
+ return
+ }
+ err = s.stor.SetMeta(fd)
+ return
+}
+
+// Flush record to disk.
+func (s *session) flushManifest(rec *sessionRecord) (err error) {
+ s.fillRecord(rec, false)
+ w, err := s.manifest.Next()
+ if err != nil {
+ return
+ }
+ err = rec.encode(w)
+ if err != nil {
+ return
+ }
+ err = s.manifest.Flush()
+ if err != nil {
+ return
+ }
+ if !s.o.GetNoSync() {
+ err = s.manifestWriter.Sync()
+ if err != nil {
+ return
+ }
+ }
+ s.recordCommited(rec)
+ return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go
new file mode 100644
index 00000000..d45fb5df
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go
@@ -0,0 +1,63 @@
+package leveldb
+
+import (
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "sync/atomic"
+)
+
+type iStorage struct {
+ storage.Storage
+ read uint64
+ write uint64
+}
+
+func (c *iStorage) Open(fd storage.FileDesc) (storage.Reader, error) {
+ r, err := c.Storage.Open(fd)
+ return &iStorageReader{r, c}, err
+}
+
+func (c *iStorage) Create(fd storage.FileDesc) (storage.Writer, error) {
+ w, err := c.Storage.Create(fd)
+ return &iStorageWriter{w, c}, err
+}
+
+func (c *iStorage) reads() uint64 {
+ return atomic.LoadUint64(&c.read)
+}
+
+func (c *iStorage) writes() uint64 {
+ return atomic.LoadUint64(&c.write)
+}
+
+// newIStorage returns the given storage wrapped by iStorage.
+func newIStorage(s storage.Storage) *iStorage {
+ return &iStorage{s, 0, 0}
+}
+
+type iStorageReader struct {
+ storage.Reader
+ c *iStorage
+}
+
+func (r *iStorageReader) Read(p []byte) (n int, err error) {
+ n, err = r.Reader.Read(p)
+ atomic.AddUint64(&r.c.read, uint64(n))
+ return n, err
+}
+
+func (r *iStorageReader) ReadAt(p []byte, off int64) (n int, err error) {
+ n, err = r.Reader.ReadAt(p, off)
+ atomic.AddUint64(&r.c.read, uint64(n))
+ return n, err
+}
+
+type iStorageWriter struct {
+ storage.Writer
+ c *iStorage
+}
+
+func (w *iStorageWriter) Write(p []byte) (n int, err error) {
+ n, err = w.Writer.Write(p)
+ atomic.AddUint64(&w.c.write, uint64(n))
+ return n, err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
new file mode 100644
index 00000000..9ba71fd6
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
@@ -0,0 +1,671 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reservefs.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "runtime"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+)
+
+var (
+ errFileOpen = errors.New("leveldb/storage: file still open")
+ errReadOnly = errors.New("leveldb/storage: storage is read-only")
+)
+
+type fileLock interface {
+ release() error
+}
+
+type fileStorageLock struct {
+ fs *fileStorage
+}
+
+func (lock *fileStorageLock) Unlock() {
+ if lock.fs != nil {
+ lock.fs.mu.Lock()
+ defer lock.fs.mu.Unlock()
+ if lock.fs.slock == lock {
+ lock.fs.slock = nil
+ }
+ }
+}
+
+type int64Slice []int64
+
+func (p int64Slice) Len() int { return len(p) }
+func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] }
+func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+func writeFileSynced(filename string, data []byte, perm os.FileMode) error {
+ f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
+ if err != nil {
+ return err
+ }
+ n, err := f.Write(data)
+ if err == nil && n < len(data) {
+ err = io.ErrShortWrite
+ }
+ if err1 := f.Sync(); err == nil {
+ err = err1
+ }
+ if err1 := f.Close(); err == nil {
+ err = err1
+ }
+ return err
+}
+
+const logSizeThreshold = 1024 * 1024 // 1 MiB
+
+// fileStorage is a file-system backed storage.
+type fileStorage struct {
+ path string
+ readOnly bool
+
+ mu sync.Mutex
+ flock fileLock
+ slock *fileStorageLock
+ logw *os.File
+ logSize int64
+ buf []byte
+ // Opened file counter; if open < 0 means closed.
+ open int
+ day int
+}
+
+// OpenFile returns a new filesystem-backed storage implementation with the given
+// path. This also acquire a file lock, so any subsequent attempt to open the
+// same path will fail.
+//
+// The storage must be closed after use, by calling Close method.
+func OpenFile(path string, readOnly bool) (Storage, error) {
+ if fi, err := os.Stat(path); err == nil {
+ if !fi.IsDir() {
+ return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
+ }
+ } else if os.IsNotExist(err) && !readOnly {
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, err
+ }
+
+ flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ if err != nil {
+ flock.release()
+ }
+ }()
+
+ var (
+ logw *os.File
+ logSize int64
+ )
+ if !readOnly {
+ logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return nil, err
+ }
+ logSize, err = logw.Seek(0, os.SEEK_END)
+ if err != nil {
+ logw.Close()
+ return nil, err
+ }
+ }
+
+ fs := &fileStorage{
+ path: path,
+ readOnly: readOnly,
+ flock: flock,
+ logw: logw,
+ logSize: logSize,
+ }
+ runtime.SetFinalizer(fs, (*fileStorage).Close)
+ return fs, nil
+}
+
+func (fs *fileStorage) Lock() (Locker, error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ if fs.readOnly {
+ return &fileStorageLock{}, nil
+ }
+ if fs.slock != nil {
+ return nil, ErrLocked
+ }
+ fs.slock = &fileStorageLock{fs: fs}
+ return fs.slock, nil
+}
+
+func itoa(buf []byte, i int, wid int) []byte {
+ u := uint(i)
+ if u == 0 && wid <= 1 {
+ return append(buf, '0')
+ }
+
+ // Assemble decimal in reverse order.
+ var b [32]byte
+ bp := len(b)
+ for ; u > 0 || wid > 0; u /= 10 {
+ bp--
+ wid--
+ b[bp] = byte(u%10) + '0'
+ }
+ return append(buf, b[bp:]...)
+}
+
+func (fs *fileStorage) printDay(t time.Time) {
+ if fs.day == t.Day() {
+ return
+ }
+ fs.day = t.Day()
+ fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n"))
+}
+
+func (fs *fileStorage) doLog(t time.Time, str string) {
+ if fs.logSize > logSizeThreshold {
+ // Rotate log file.
+ fs.logw.Close()
+ fs.logw = nil
+ fs.logSize = 0
+ rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
+ }
+ if fs.logw == nil {
+ var err error
+ fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return
+ }
+ // Force printDay on new log file.
+ fs.day = 0
+ }
+ fs.printDay(t)
+ hour, min, sec := t.Clock()
+ msec := t.Nanosecond() / 1e3
+ // time
+ fs.buf = itoa(fs.buf[:0], hour, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, min, 2)
+ fs.buf = append(fs.buf, ':')
+ fs.buf = itoa(fs.buf, sec, 2)
+ fs.buf = append(fs.buf, '.')
+ fs.buf = itoa(fs.buf, msec, 6)
+ fs.buf = append(fs.buf, ' ')
+ // write
+ fs.buf = append(fs.buf, []byte(str)...)
+ fs.buf = append(fs.buf, '\n')
+ n, _ := fs.logw.Write(fs.buf)
+ fs.logSize += int64(n)
+}
+
+func (fs *fileStorage) Log(str string) {
+ if !fs.readOnly {
+ t := time.Now()
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return
+ }
+ fs.doLog(t, str)
+ }
+}
+
+func (fs *fileStorage) log(str string) {
+ if !fs.readOnly {
+ fs.doLog(time.Now(), str)
+ }
+}
+
+func (fs *fileStorage) setMeta(fd FileDesc) error {
+ content := fsGenName(fd) + "\n"
+ // Check and backup old CURRENT file.
+ currentPath := filepath.Join(fs.path, "CURRENT")
+ if _, err := os.Stat(currentPath); err == nil {
+ b, err := ioutil.ReadFile(currentPath)
+ if err != nil {
+ fs.log(fmt.Sprintf("backup CURRENT: %v", err))
+ return err
+ }
+ if string(b) == content {
+ // Content not changed, do nothing.
+ return nil
+ }
+ if err := writeFileSynced(currentPath+".bak", b, 0644); err != nil {
+ fs.log(fmt.Sprintf("backup CURRENT: %v", err))
+ return err
+ }
+ } else if !os.IsNotExist(err) {
+ return err
+ }
+ path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+ if err := writeFileSynced(path, []byte(content), 0644); err != nil {
+ fs.log(fmt.Sprintf("create CURRENT.%d: %v", fd.Num, err))
+ return err
+ }
+ // Replace CURRENT file.
+ if err := rename(path, currentPath); err != nil {
+ fs.log(fmt.Sprintf("rename CURRENT.%d: %v", fd.Num, err))
+ return err
+ }
+ // Sync root directory.
+ if err := syncDir(fs.path); err != nil {
+ fs.log(fmt.Sprintf("syncDir: %v", err))
+ return err
+ }
+ return nil
+}
+
+func (fs *fileStorage) SetMeta(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ return fs.setMeta(fd)
+}
+
+func (fs *fileStorage) GetMeta() (FileDesc, error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return FileDesc{}, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return FileDesc{}, err
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if ce := dir.Close(); ce != nil {
+ fs.log(fmt.Sprintf("close dir: %v", ce))
+ }
+ if err != nil {
+ return FileDesc{}, err
+ }
+ // Try this in order:
+ // - CURRENT.[0-9]+ ('pending rename' file, descending order)
+ // - CURRENT
+ // - CURRENT.bak
+ //
+ // Skip corrupted file or file that point to a missing target file.
+ type currentFile struct {
+ name string
+ fd FileDesc
+ }
+ tryCurrent := func(name string) (*currentFile, error) {
+ b, err := ioutil.ReadFile(filepath.Join(fs.path, name))
+ if err != nil {
+ if os.IsNotExist(err) {
+ err = os.ErrNotExist
+ }
+ return nil, err
+ }
+ var fd FileDesc
+ if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd) {
+ fs.log(fmt.Sprintf("%s: corrupted content: %q", name, b))
+ err := &ErrCorrupted{
+ Err: errors.New("leveldb/storage: corrupted or incomplete CURRENT file"),
+ }
+ return nil, err
+ }
+ if _, err := os.Stat(filepath.Join(fs.path, fsGenName(fd))); err != nil {
+ if os.IsNotExist(err) {
+ fs.log(fmt.Sprintf("%s: missing target file: %s", name, fd))
+ err = os.ErrNotExist
+ }
+ return nil, err
+ }
+ return ¤tFile{name: name, fd: fd}, nil
+ }
+ tryCurrents := func(names []string) (*currentFile, error) {
+ var (
+ cur *currentFile
+ // Last corruption error.
+ lastCerr error
+ )
+ for _, name := range names {
+ var err error
+ cur, err = tryCurrent(name)
+ if err == nil {
+ break
+ } else if err == os.ErrNotExist {
+ // Fallback to the next file.
+ } else if isCorrupted(err) {
+ lastCerr = err
+ // Fallback to the next file.
+ } else {
+ // In case the error is due to permission, etc.
+ return nil, err
+ }
+ }
+ if cur == nil {
+ err := os.ErrNotExist
+ if lastCerr != nil {
+ err = lastCerr
+ }
+ return nil, err
+ }
+ return cur, nil
+ }
+
+ // Try 'pending rename' files.
+ var nums []int64
+ for _, name := range names {
+ if strings.HasPrefix(name, "CURRENT.") && name != "CURRENT.bak" {
+ i, err := strconv.ParseInt(name[8:], 10, 64)
+ if err == nil {
+ nums = append(nums, i)
+ }
+ }
+ }
+ var (
+ pendCur *currentFile
+ pendErr = os.ErrNotExist
+ pendNames []string
+ )
+ if len(nums) > 0 {
+ sort.Sort(sort.Reverse(int64Slice(nums)))
+ pendNames = make([]string, len(nums))
+ for i, num := range nums {
+ pendNames[i] = fmt.Sprintf("CURRENT.%d", num)
+ }
+ pendCur, pendErr = tryCurrents(pendNames)
+ if pendErr != nil && pendErr != os.ErrNotExist && !isCorrupted(pendErr) {
+ return FileDesc{}, pendErr
+ }
+ }
+
+ // Try CURRENT and CURRENT.bak.
+ curCur, curErr := tryCurrents([]string{"CURRENT", "CURRENT.bak"})
+ if curErr != nil && curErr != os.ErrNotExist && !isCorrupted(curErr) {
+ return FileDesc{}, curErr
+ }
+
+ // pendCur takes precedence, but guards against obsolete pendCur.
+ if pendCur != nil && (curCur == nil || pendCur.fd.Num > curCur.fd.Num) {
+ curCur = pendCur
+ }
+
+ if curCur != nil {
+ // Restore CURRENT file to proper state.
+ if !fs.readOnly && (curCur.name != "CURRENT" || len(pendNames) != 0) {
+ // Ignore setMeta errors, however don't delete obsolete files if we
+ // catch error.
+ if err := fs.setMeta(curCur.fd); err == nil {
+ // Remove 'pending rename' files.
+ for _, name := range pendNames {
+ if err := os.Remove(filepath.Join(fs.path, name)); err != nil {
+ fs.log(fmt.Sprintf("remove %s: %v", name, err))
+ }
+ }
+ }
+ }
+ return curCur.fd, nil
+ }
+
+ // Nothing found.
+ if isCorrupted(pendErr) {
+ return FileDesc{}, pendErr
+ }
+ return FileDesc{}, curErr
+}
+
+func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ dir, err := os.Open(fs.path)
+ if err != nil {
+ return
+ }
+ names, err := dir.Readdirnames(0)
+ // Close the dir first before checking for Readdirnames error.
+ if cerr := dir.Close(); cerr != nil {
+ fs.log(fmt.Sprintf("close dir: %v", cerr))
+ }
+ if err == nil {
+ for _, name := range names {
+ if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ }
+ return
+}
+
+func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
+ if err == nil {
+ goto ok
+ }
+ }
+ return nil, err
+ }
+ok:
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+ if fs.readOnly {
+ return nil, errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return nil, ErrClosed
+ }
+ of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return nil, err
+ }
+ fs.open++
+ return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
+ if err != nil {
+ if fsHasOldName(fd) && os.IsNotExist(err) {
+ if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
+ fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
+ err = e1
+ }
+ } else {
+ fs.log(fmt.Sprintf("remove %s: %v", fd, err))
+ }
+ }
+ return err
+}
+
+func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
+ if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+ if fs.readOnly {
+ return errReadOnly
+ }
+
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
+}
+
+func (fs *fileStorage) Close() error {
+ fs.mu.Lock()
+ defer fs.mu.Unlock()
+ if fs.open < 0 {
+ return ErrClosed
+ }
+ // Clear the finalizer.
+ runtime.SetFinalizer(fs, nil)
+
+ if fs.open > 0 {
+ fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
+ }
+ fs.open = -1
+ if fs.logw != nil {
+ fs.logw.Close()
+ }
+ return fs.flock.release()
+}
+
+type fileWrap struct {
+ *os.File
+ fs *fileStorage
+ fd FileDesc
+ closed bool
+}
+
+func (fw *fileWrap) Sync() error {
+ if err := fw.File.Sync(); err != nil {
+ return err
+ }
+ if fw.fd.Type == TypeManifest {
+ // Also sync parent directory if file type is manifest.
+ // See: https://code.google.com/p/leveldb/issues/detail?id=190.
+ if err := syncDir(fw.fs.path); err != nil {
+ fw.fs.log(fmt.Sprintf("syncDir: %v", err))
+ return err
+ }
+ }
+ return nil
+}
+
+func (fw *fileWrap) Close() error {
+ fw.fs.mu.Lock()
+ defer fw.fs.mu.Unlock()
+ if fw.closed {
+ return ErrClosed
+ }
+ fw.closed = true
+ fw.fs.open--
+ err := fw.File.Close()
+ if err != nil {
+ fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
+ }
+ return err
+}
+
+func fsGenName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ panic("invalid file type")
+ }
+}
+
+func fsHasOldName(fd FileDesc) bool {
+ return fd.Type == TypeTable
+}
+
+func fsGenOldName(fd FileDesc) string {
+ switch fd.Type {
+ case TypeTable:
+ return fmt.Sprintf("%06d.sst", fd.Num)
+ }
+ return fsGenName(fd)
+}
+
+func fsParseName(name string) (fd FileDesc, ok bool) {
+ var tail string
+ _, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
+ if err == nil {
+ switch tail {
+ case "log":
+ fd.Type = TypeJournal
+ case "ldb", "sst":
+ fd.Type = TypeTable
+ case "tmp":
+ fd.Type = TypeTemp
+ default:
+ return
+ }
+ return fd, true
+ }
+ n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
+ if n == 1 {
+ fd.Type = TypeManifest
+ return fd, true
+ }
+ return
+}
+
+func fsParseNamePtr(name string, fd *FileDesc) bool {
+ _fd, ok := fsParseName(name)
+ if fd != nil {
+ *fd = _fd
+ }
+ return ok
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
new file mode 100644
index 00000000..5545aeef
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
@@ -0,0 +1,34 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build nacl
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ return nil, syscall.ENOTSUP
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ return syscall.ENOTSUP
+}
+
+func rename(oldpath, newpath string) error {
+ return syscall.ENOTSUP
+}
+
+func isErrInvalid(err error) bool {
+ return false
+}
+
+func syncDir(name string) error {
+ return syscall.ENOTSUP
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
new file mode 100644
index 00000000..b8297980
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
@@ -0,0 +1,63 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "os"
+)
+
+type plan9FileLock struct {
+ f *os.File
+}
+
+func (fl *plan9FileLock) release() error {
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var (
+ flag int
+ perm os.FileMode
+ )
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ perm = os.ModeExclusive
+ }
+ f, err := os.OpenFile(path, flag, perm)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
+ }
+ if err != nil {
+ return
+ }
+ fl = &plan9FileLock{f: f}
+ return
+}
+
+func rename(oldpath, newpath string) error {
+ if _, err := os.Stat(newpath); err == nil {
+ if err := os.Remove(newpath); err != nil {
+ return err
+ }
+ }
+
+ return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
new file mode 100644
index 00000000..79901ee4
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build solaris
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ flock := syscall.Flock_t{
+ Type: syscall.F_UNLCK,
+ Start: 0,
+ Len: 0,
+ Whence: 1,
+ }
+ if lock {
+ if readOnly {
+ flock.Type = syscall.F_RDLCK
+ } else {
+ flock.Type = syscall.F_WRLCK
+ }
+ }
+ return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
new file mode 100644
index 00000000..d75f66a9
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package storage
+
+import (
+ "os"
+ "syscall"
+)
+
+type unixFileLock struct {
+ f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+ if err := setFileLock(fl.f, false, false); err != nil {
+ return err
+ }
+ return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ var flag int
+ if readOnly {
+ flag = os.O_RDONLY
+ } else {
+ flag = os.O_RDWR
+ }
+ f, err := os.OpenFile(path, flag, 0)
+ if os.IsNotExist(err) {
+ f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+ }
+ if err != nil {
+ return
+ }
+ err = setFileLock(f, readOnly, true)
+ if err != nil {
+ f.Close()
+ return
+ }
+ fl = &unixFileLock{f: f}
+ return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+ how := syscall.LOCK_UN
+ if lock {
+ if readOnly {
+ how = syscall.LOCK_SH
+ } else {
+ how = syscall.LOCK_EX
+ }
+ }
+ return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
+}
+
+func rename(oldpath, newpath string) error {
+ return os.Rename(oldpath, newpath)
+}
+
+func isErrInvalid(err error) bool {
+ if err == os.ErrInvalid {
+ return true
+ }
+ // Go < 1.8
+ if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
+ return true
+ }
+ // Go >= 1.8 returns *os.PathError instead
+ if patherr, ok := err.(*os.PathError); ok && patherr.Err == syscall.EINVAL {
+ return true
+ }
+ return false
+}
+
+func syncDir(name string) error {
+ // As per fsync manpage, Linux seems to expect fsync on directory, however
+ // some system don't support this, so we will ignore syscall.EINVAL.
+ //
+ // From fsync(2):
+ // Calling fsync() does not necessarily ensure that the entry in the
+ // directory containing the file has also reached disk. For that an
+ // explicit fsync() on a file descriptor for the directory is also needed.
+ f, err := os.Open(name)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if err := f.Sync(); err != nil && !isErrInvalid(err) {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
new file mode 100644
index 00000000..899335fd
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2013, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+var (
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+ procMoveFileExW = modkernel32.NewProc("MoveFileExW")
+)
+
+const (
+ _MOVEFILE_REPLACE_EXISTING = 1
+)
+
+type windowsFileLock struct {
+ fd syscall.Handle
+}
+
+func (fl *windowsFileLock) release() error {
+ return syscall.Close(fl.fd)
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+ pathp, err := syscall.UTF16PtrFromString(path)
+ if err != nil {
+ return
+ }
+ var access, shareMode uint32
+ if readOnly {
+ access = syscall.GENERIC_READ
+ shareMode = syscall.FILE_SHARE_READ
+ } else {
+ access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
+ }
+ fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ if err == syscall.ERROR_FILE_NOT_FOUND {
+ fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ }
+ if err != nil {
+ return
+ }
+ fl = &windowsFileLock{fd: fd}
+ return
+}
+
+func moveFileEx(from *uint16, to *uint16, flags uint32) error {
+ r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
+ if r1 == 0 {
+ if e1 != 0 {
+ return error(e1)
+ }
+ return syscall.EINVAL
+ }
+ return nil
+}
+
+func rename(oldpath, newpath string) error {
+ from, err := syscall.UTF16PtrFromString(oldpath)
+ if err != nil {
+ return err
+ }
+ to, err := syscall.UTF16PtrFromString(newpath)
+ if err != nil {
+ return err
+ }
+ return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING)
+}
+
+func syncDir(name string) error { return nil }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
new file mode 100644
index 00000000..838f1bee
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
@@ -0,0 +1,222 @@
+// Copyright (c) 2013, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+ "bytes"
+ "os"
+ "sync"
+)
+
+const typeShift = 4
+
+// Verify at compile-time that typeShift is large enough to cover all FileType
+// values by confirming that 0 == 0.
+var _ [0]struct{} = [TypeAll >> typeShift]struct{}{}
+
+type memStorageLock struct {
+ ms *memStorage
+}
+
+func (lock *memStorageLock) Unlock() {
+ ms := lock.ms
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock == lock {
+ ms.slock = nil
+ }
+ return
+}
+
+// memStorage is a memory-backed storage.
+type memStorage struct {
+ mu sync.Mutex
+ slock *memStorageLock
+ files map[uint64]*memFile
+ meta FileDesc
+}
+
+// NewMemStorage returns a new memory-backed storage implementation.
+func NewMemStorage() Storage {
+ return &memStorage{
+ files: make(map[uint64]*memFile),
+ }
+}
+
+func (ms *memStorage) Lock() (Locker, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.slock != nil {
+ return nil, ErrLocked
+ }
+ ms.slock = &memStorageLock{ms: ms}
+ return ms.slock, nil
+}
+
+func (*memStorage) Log(str string) {}
+
+func (ms *memStorage) SetMeta(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ ms.meta = fd
+ ms.mu.Unlock()
+ return nil
+}
+
+func (ms *memStorage) GetMeta() (FileDesc, error) {
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if ms.meta.Zero() {
+ return FileDesc{}, os.ErrNotExist
+ }
+ return ms.meta, nil
+}
+
+func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
+ ms.mu.Lock()
+ var fds []FileDesc
+ for x := range ms.files {
+ fd := unpackFile(x)
+ if fd.Type&ft != 0 {
+ fds = append(fds, fd)
+ }
+ }
+ ms.mu.Unlock()
+ return fds, nil
+}
+
+func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if m, exist := ms.files[packFile(fd)]; exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.open = true
+ return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
+ }
+ return nil, os.ErrNotExist
+}
+
+func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
+ if !FileDescOk(fd) {
+ return nil, ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ m, exist := ms.files[x]
+ if exist {
+ if m.open {
+ return nil, errFileOpen
+ }
+ m.Reset()
+ } else {
+ m = &memFile{}
+ ms.files[x] = m
+ }
+ m.open = true
+ return &memWriter{memFile: m, ms: ms}, nil
+}
+
+func (ms *memStorage) Remove(fd FileDesc) error {
+ if !FileDescOk(fd) {
+ return ErrInvalidFile
+ }
+
+ x := packFile(fd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ if _, exist := ms.files[x]; exist {
+ delete(ms.files, x)
+ return nil
+ }
+ return os.ErrNotExist
+}
+
+func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
+ if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+ return ErrInvalidFile
+ }
+ if oldfd == newfd {
+ return nil
+ }
+
+ oldx := packFile(oldfd)
+ newx := packFile(newfd)
+ ms.mu.Lock()
+ defer ms.mu.Unlock()
+ oldm, exist := ms.files[oldx]
+ if !exist {
+ return os.ErrNotExist
+ }
+ newm, exist := ms.files[newx]
+ if (exist && newm.open) || oldm.open {
+ return errFileOpen
+ }
+ delete(ms.files, oldx)
+ ms.files[newx] = oldm
+ return nil
+}
+
+func (*memStorage) Close() error { return nil }
+
+type memFile struct {
+ bytes.Buffer
+ open bool
+}
+
+type memReader struct {
+ *bytes.Reader
+ ms *memStorage
+ m *memFile
+ closed bool
+}
+
+func (mr *memReader) Close() error {
+ mr.ms.mu.Lock()
+ defer mr.ms.mu.Unlock()
+ if mr.closed {
+ return ErrClosed
+ }
+ mr.m.open = false
+ return nil
+}
+
+type memWriter struct {
+ *memFile
+ ms *memStorage
+ closed bool
+}
+
+func (*memWriter) Sync() error { return nil }
+
+func (mw *memWriter) Close() error {
+ mw.ms.mu.Lock()
+ defer mw.ms.mu.Unlock()
+ if mw.closed {
+ return ErrClosed
+ }
+ mw.memFile.open = false
+ return nil
+}
+
+func packFile(fd FileDesc) uint64 {
+ return uint64(fd.Num)<> typeShift)}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
new file mode 100644
index 00000000..4e4a7242
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
@@ -0,0 +1,187 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package storage provides storage abstraction for LevelDB.
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io"
+)
+
+// FileType represent a file type.
+type FileType int
+
+// File types.
+const (
+ TypeManifest FileType = 1 << iota
+ TypeJournal
+ TypeTable
+ TypeTemp
+
+ TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp
+)
+
+func (t FileType) String() string {
+ switch t {
+ case TypeManifest:
+ return "manifest"
+ case TypeJournal:
+ return "journal"
+ case TypeTable:
+ return "table"
+ case TypeTemp:
+ return "temp"
+ }
+ return fmt.Sprintf("", t)
+}
+
+// Common error.
+var (
+ ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument")
+ ErrLocked = errors.New("leveldb/storage: already locked")
+ ErrClosed = errors.New("leveldb/storage: closed")
+)
+
+// ErrCorrupted is the type that wraps errors that indicate corruption of
+// a file. Package storage has its own type instead of using
+// errors.ErrCorrupted to prevent circular import.
+type ErrCorrupted struct {
+ Fd FileDesc
+ Err error
+}
+
+func isCorrupted(err error) bool {
+ switch err.(type) {
+ case *ErrCorrupted:
+ return true
+ }
+ return false
+}
+
+func (e *ErrCorrupted) Error() string {
+ if !e.Fd.Zero() {
+ return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+ }
+ return e.Err.Error()
+}
+
+// Syncer is the interface that wraps basic Sync method.
+type Syncer interface {
+ // Sync commits the current contents of the file to stable storage.
+ Sync() error
+}
+
+// Reader is the interface that groups the basic Read, Seek, ReadAt and Close
+// methods.
+type Reader interface {
+ io.ReadSeeker
+ io.ReaderAt
+ io.Closer
+}
+
+// Writer is the interface that groups the basic Write, Sync and Close
+// methods.
+type Writer interface {
+ io.WriteCloser
+ Syncer
+}
+
+// Locker is the interface that wraps Unlock method.
+type Locker interface {
+ Unlock()
+}
+
+// FileDesc is a 'file descriptor'.
+type FileDesc struct {
+ Type FileType
+ Num int64
+}
+
+func (fd FileDesc) String() string {
+ switch fd.Type {
+ case TypeManifest:
+ return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+ case TypeJournal:
+ return fmt.Sprintf("%06d.log", fd.Num)
+ case TypeTable:
+ return fmt.Sprintf("%06d.ldb", fd.Num)
+ case TypeTemp:
+ return fmt.Sprintf("%06d.tmp", fd.Num)
+ default:
+ return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
+ }
+}
+
+// Zero returns true if fd == (FileDesc{}).
+func (fd FileDesc) Zero() bool {
+ return fd == (FileDesc{})
+}
+
+// FileDescOk returns true if fd is a valid 'file descriptor'.
+func FileDescOk(fd FileDesc) bool {
+ switch fd.Type {
+ case TypeManifest:
+ case TypeJournal:
+ case TypeTable:
+ case TypeTemp:
+ default:
+ return false
+ }
+ return fd.Num >= 0
+}
+
+// Storage is the storage. A storage instance must be safe for concurrent use.
+type Storage interface {
+ // Lock locks the storage. Any subsequent attempt to call Lock will fail
+ // until the last lock released.
+ // Caller should call Unlock method after use.
+ Lock() (Locker, error)
+
+ // Log logs a string. This is used for logging.
+ // An implementation may write to a file, stdout or simply do nothing.
+ Log(str string)
+
+ // SetMeta store 'file descriptor' that can later be acquired using GetMeta
+ // method. The 'file descriptor' should point to a valid file.
+ // SetMeta should be implemented in such way that changes should happen
+ // atomically.
+ SetMeta(fd FileDesc) error
+
+ // GetMeta returns 'file descriptor' stored in meta. The 'file descriptor'
+ // can be updated using SetMeta method.
+ // Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or
+ // 'file descriptor' point to nonexistent file.
+ GetMeta() (FileDesc, error)
+
+ // List returns file descriptors that match the given file types.
+ // The file types may be OR'ed together.
+ List(ft FileType) ([]FileDesc, error)
+
+ // Open opens file with the given 'file descriptor' read-only.
+ // Returns os.ErrNotExist error if the file does not exist.
+ // Returns ErrClosed if the underlying storage is closed.
+ Open(fd FileDesc) (Reader, error)
+
+ // Create creates file with the given 'file descriptor', truncate if already
+ // exist and opens write-only.
+ // Returns ErrClosed if the underlying storage is closed.
+ Create(fd FileDesc) (Writer, error)
+
+ // Remove removes file with the given 'file descriptor'.
+ // Returns ErrClosed if the underlying storage is closed.
+ Remove(fd FileDesc) error
+
+ // Rename renames file from oldfd to newfd.
+ // Returns ErrClosed if the underlying storage is closed.
+ Rename(oldfd, newfd FileDesc) error
+
+ // Close closes the storage.
+ // It is valid to call Close multiple times. Other methods should not be
+ // called after the storage has been closed.
+ Close() error
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
new file mode 100644
index 00000000..1fac60d0
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
@@ -0,0 +1,531 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// tFile holds basic information about a table.
+type tFile struct {
+ fd storage.FileDesc
+ seekLeft int32
+ size int64
+ imin, imax internalKey
+}
+
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
+}
+
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
+}
+
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+ return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
+ return atomic.AddInt32(&t.seekLeft, -1)
+}
+
+// Creates new tFile.
+func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
+ f := &tFile{
+ fd: fd,
+ size: size,
+ imin: imin,
+ imax: imax,
+ }
+
+ // We arrange to automatically compact this file after
+ // a certain number of seeks. Let's assume:
+ // (1) One seek costs 10ms
+ // (2) Writing or reading 1MB costs 10ms (100MB/s)
+ // (3) A compaction of 1MB does 25MB of IO:
+ // 1MB read from this level
+ // 10-12MB read from next level (boundaries may be misaligned)
+ // 10-12MB written to next level
+ // This implies that 25 seeks cost the same as the compaction
+ // of 1MB of data. I.e., one seek costs approximately the
+ // same as the compaction of 40KB of data. We are a little
+ // conservative and allow approximately one seek for every 16KB
+ // of data before triggering a compaction.
+ f.seekLeft = int32(size / 16384)
+ if f.seekLeft < 100 {
+ f.seekLeft = 100
+ }
+
+ return f
+}
+
+func tableFileFromRecord(r atRecord) *tFile {
+ return newTableFile(storage.FileDesc{Type: storage.TypeTable, Num: r.num}, r.size, r.imin, r.imax)
+}
+
+// tFiles hold multiple tFile.
+type tFiles []*tFile
+
+func (tf tFiles) Len() int { return len(tf) }
+func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+
+func (tf tFiles) nums() string {
+ x := "[ "
+ for i, f := range tf {
+ if i != 0 {
+ x += ", "
+ }
+ x += fmt.Sprint(f.fd.Num)
+ }
+ x += " ]"
+ return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
+func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
+ a, b := tf[i], tf[j]
+ n := icmp.Compare(a.imin, b.imin)
+ if n == 0 {
+ return a.fd.Num < b.fd.Num
+ }
+ return n < 0
+}
+
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
+func (tf tFiles) lessByNum(i, j int) bool {
+ return tf[i].fd.Num > tf[j].fd.Num
+}
+
+// Sorts tables by key in ascending order.
+func (tf tFiles) sortByKey(icmp *iComparer) {
+ sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
+}
+
+// Sorts tables by file number in descending order.
+func (tf tFiles) sortByNum() {
+ sort.Sort(&tFilesSortByNum{tFiles: tf})
+}
+
+// Returns sum of all tables size.
+func (tf tFiles) size() (sum int64) {
+ for _, t := range tf {
+ sum += t.size
+ }
+ return sum
+}
+
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imin, ikey) >= 0
+ })
+}
+
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imax, ikey) >= 0
+ })
+}
+
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+ if unsorted {
+ // Check against all files.
+ for _, t := range tf {
+ if t.overlaps(icmp, umin, umax) {
+ return true
+ }
+ }
+ return false
+ }
+
+ i := 0
+ if len(umin) > 0 {
+ // Find the earliest possible internal key for min.
+ i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
+ }
+ if i >= len(tf) {
+ // Beginning of range is after all files, so no overlap.
+ return false
+ }
+ return !tf[i].before(icmp, umax)
+}
+
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+ dst = dst[:0]
+ for i := 0; i < len(tf); {
+ t := tf[i]
+ if t.overlaps(icmp, umin, umax) {
+ if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+ umin = t.imin.ukey()
+ dst = dst[:0]
+ i = 0
+ continue
+ } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+ umax = t.imax.ukey()
+ // Restart search if it is overlapped.
+ if overlapped {
+ dst = dst[:0]
+ i = 0
+ continue
+ }
+ }
+
+ dst = append(dst, t)
+ }
+ i++
+ }
+
+ return dst
+}
+
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
+ for i, t := range tf {
+ if i == 0 {
+ imin, imax = t.imin, t.imax
+ continue
+ }
+ if icmp.Compare(t.imin, imin) < 0 {
+ imin = t.imin
+ }
+ if icmp.Compare(t.imax, imax) > 0 {
+ imax = t.imax
+ }
+ }
+
+ return
+}
+
+// Creates iterator index from tables.
+func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
+ if slice != nil {
+ var start, limit int
+ if slice.Start != nil {
+ start = tf.searchMax(icmp, internalKey(slice.Start))
+ }
+ if slice.Limit != nil {
+ limit = tf.searchMin(icmp, internalKey(slice.Limit))
+ } else {
+ limit = tf.Len()
+ }
+ tf = tf[start:limit]
+ }
+ return iterator.NewArrayIndexer(&tFilesArrayIndexer{
+ tFiles: tf,
+ tops: tops,
+ icmp: icmp,
+ slice: slice,
+ ro: ro,
+ })
+}
+
+// Tables iterator index.
+type tFilesArrayIndexer struct {
+ tFiles
+ tops *tOps
+ icmp *iComparer
+ slice *util.Range
+ ro *opt.ReadOptions
+}
+
+func (a *tFilesArrayIndexer) Search(key []byte) int {
+ return a.searchMax(a.icmp, internalKey(key))
+}
+
+func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
+ if i == 0 || i == a.Len()-1 {
+ return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
+ }
+ return a.tops.newIterator(a.tFiles[i], nil, a.ro)
+}
+
+// Helper type for sortByKey.
+type tFilesSortByKey struct {
+ tFiles
+ icmp *iComparer
+}
+
+func (x *tFilesSortByKey) Less(i, j int) bool {
+ return x.lessByKey(x.icmp, i, j)
+}
+
+// Helper type for sortByNum.
+type tFilesSortByNum struct {
+ tFiles
+}
+
+func (x *tFilesSortByNum) Less(i, j int) bool {
+ return x.lessByNum(i, j)
+}
+
+// Table operations.
+type tOps struct {
+ s *session
+ noSync bool
+ evictRemoved bool
+ cache *cache.Cache
+ bcache *cache.Cache
+ bpool *util.BufferPool
+}
+
+// Creates an empty table and returns table writer.
+func (t *tOps) create() (*tWriter, error) {
+ fd := storage.FileDesc{Type: storage.TypeTable, Num: t.s.allocFileNum()}
+ fw, err := t.s.stor.Create(fd)
+ if err != nil {
+ return nil, err
+ }
+ return &tWriter{
+ t: t,
+ fd: fd,
+ w: fw,
+ tw: table.NewWriter(fw, t.s.o.Options),
+ }, nil
+}
+
+// Builds table from src iterator.
+func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
+ w, err := t.create()
+ if err != nil {
+ return
+ }
+
+ defer func() {
+ if err != nil {
+ w.drop()
+ }
+ }()
+
+ for src.Next() {
+ err = w.append(src.Key(), src.Value())
+ if err != nil {
+ return
+ }
+ }
+ err = src.Error()
+ if err != nil {
+ return
+ }
+
+ n = w.tw.EntriesLen()
+ f, err = w.finish()
+ return
+}
+
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
+ ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
+ var r storage.Reader
+ r, err = t.s.stor.Open(f.fd)
+ if err != nil {
+ return 0, nil
+ }
+
+ var bcache *cache.NamespaceGetter
+ if t.bcache != nil {
+ bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
+ }
+
+ var tr *table.Reader
+ tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
+ if err != nil {
+ r.Close()
+ return 0, nil
+ }
+ return 1, tr
+
+ })
+ if ch == nil && err == nil {
+ err = ErrClosed
+ }
+ return
+}
+
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).FindKey(key, true, ro)
+}
+
+// Returns approximate offset of the given key.
+func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).OffsetOf(key)
+}
+
+// Creates an iterator from the given table.
+func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ ch, err := t.open(f)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+ iter.SetReleaser(ch)
+ return iter
+}
+
+// Removes table from persistent storage. It waits until
+// no one use the the table.
+func (t *tOps) remove(f *tFile) {
+ t.cache.Delete(0, uint64(f.fd.Num), func() {
+ if err := t.s.stor.Remove(f.fd); err != nil {
+ t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
+ } else {
+ t.s.logf("table@remove removed @%d", f.fd.Num)
+ }
+ if t.evictRemoved && t.bcache != nil {
+ t.bcache.EvictNS(uint64(f.fd.Num))
+ }
+ })
+}
+
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
+func (t *tOps) close() {
+ t.bpool.Close()
+ t.cache.Close()
+ if t.bcache != nil {
+ t.bcache.CloseWeak()
+ }
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+ var (
+ cacher cache.Cacher
+ bcache *cache.Cache
+ bpool *util.BufferPool
+ )
+ if s.o.GetOpenFilesCacheCapacity() > 0 {
+ cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+ }
+ if !s.o.GetDisableBlockCache() {
+ var bcacher cache.Cacher
+ if s.o.GetBlockCacheCapacity() > 0 {
+ bcacher = s.o.GetBlockCacher().New(s.o.GetBlockCacheCapacity())
+ }
+ bcache = cache.NewCache(bcacher)
+ }
+ if !s.o.GetDisableBufferPool() {
+ bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+ }
+ return &tOps{
+ s: s,
+ noSync: s.o.GetNoSync(),
+ evictRemoved: s.o.GetBlockCacheEvictRemoved(),
+ cache: cache.NewCache(cacher),
+ bcache: bcache,
+ bpool: bpool,
+ }
+}
+
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
+type tWriter struct {
+ t *tOps
+
+ fd storage.FileDesc
+ w storage.Writer
+ tw *table.Writer
+
+ first, last []byte
+}
+
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
+ if w.first == nil {
+ w.first = append([]byte{}, key...)
+ }
+ w.last = append(w.last[:0], key...)
+ return w.tw.Append(key, value)
+}
+
+// Returns true if the table is empty.
+func (w *tWriter) empty() bool {
+ return w.first == nil
+}
+
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+ if w.w != nil {
+ w.w.Close()
+ w.w = nil
+ }
+}
+
+// Finalizes the table and returns table file.
+func (w *tWriter) finish() (f *tFile, err error) {
+ defer w.close()
+ err = w.tw.Close()
+ if err != nil {
+ return
+ }
+ if !w.t.noSync {
+ err = w.w.Sync()
+ if err != nil {
+ return
+ }
+ }
+ f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
+ return
+}
+
+// Drops the table.
+func (w *tWriter) drop() {
+ w.close()
+ w.t.s.stor.Remove(w.fd)
+ w.t.s.reuseFileNum(w.fd.Num)
+ w.tw = nil
+ w.first = nil
+ w.last = nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
new file mode 100644
index 00000000..496feb6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
@@ -0,0 +1,1139 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "sort"
+ "strings"
+ "sync"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/errors"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader errors.
+var (
+ ErrNotFound = errors.ErrNotFound
+ ErrReaderReleased = errors.New("leveldb/table: reader released")
+ ErrIterReleased = errors.New("leveldb/table: iterator released")
+)
+
+// ErrCorrupted describes error due to corruption. This error will be wrapped
+// with errors.ErrCorrupted.
+type ErrCorrupted struct {
+ Pos int64
+ Size int64
+ Kind string
+ Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+ return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
+}
+
+func max(x, y int) int {
+ if x > y {
+ return x
+ }
+ return y
+}
+
+type block struct {
+ bpool *util.BufferPool
+ bh blockHandle
+ data []byte
+ restartsLen int
+ restartsOffset int
+}
+
+func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
+ index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
+ offset++ // shared always zero, since this is a restart point
+ v1, n1 := binary.Uvarint(b.data[offset:]) // key length
+ _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
+ m := offset + n1 + n2
+ return cmp.Compare(b.data[m:m+int(v1)], key) > 0
+ }) + rstart - 1
+ if index < rstart {
+ // The smallest key is greater-than key sought.
+ index = rstart
+ }
+ offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+ return
+}
+
+func (b *block) restartIndex(rstart, rlimit, offset int) int {
+ return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
+ }) + rstart - 1
+}
+
+func (b *block) restartOffset(index int) int {
+ return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+}
+
+func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
+ if offset >= b.restartsOffset {
+ if offset != b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries offset not aligned"}
+ }
+ return
+ }
+ v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length
+ v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length
+ v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length
+ m := n0 + n1 + n2
+ n = m + int(v1) + int(v2)
+ if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
+ err = &ErrCorrupted{Reason: "entries corrupted"}
+ return
+ }
+ key = b.data[offset+m : offset+m+int(v1)]
+ value = b.data[offset+m+int(v1) : offset+n]
+ nShared = int(v0)
+ return
+}
+
+func (b *block) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type dir int
+
+const (
+ dirReleased dir = iota - 1
+ dirSOI
+ dirEOI
+ dirBackward
+ dirForward
+)
+
+type blockIter struct {
+ tr *Reader
+ block *block
+ blockReleaser util.Releaser
+ releaser util.Releaser
+ key, value []byte
+ offset int
+ // Previous offset, only filled by Next.
+ prevOffset int
+ prevNode []int
+ prevKeys []byte
+ restartIndex int
+ // Iterator direction.
+ dir dir
+ // Restart index slice range.
+ riStart int
+ riLimit int
+ // Offset slice range.
+ offsetStart int
+ offsetRealStart int
+ offsetLimit int
+ // Error.
+ err error
+}
+
+func (i *blockIter) sErr(err error) {
+ i.err = err
+ i.key = nil
+ i.value = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+}
+
+func (i *blockIter) reset() {
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ i.dir = dirSOI
+ i.key = i.key[:0]
+ i.value = nil
+}
+
+func (i *blockIter) isFirst() bool {
+ switch i.dir {
+ case dirForward:
+ return i.prevOffset == i.offsetRealStart
+ case dirBackward:
+ return len(i.prevNode) == 1 && i.restartIndex == i.riStart
+ }
+ return false
+}
+
+func (i *blockIter) isLast() bool {
+ switch i.dir {
+ case dirForward, dirBackward:
+ return i.offset == i.offsetLimit
+ }
+ return false
+}
+
+func (i *blockIter) First() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirSOI
+ return i.Next()
+}
+
+func (i *blockIter) Last() bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ i.dir = dirEOI
+ return i.Prev()
+}
+
+func (i *blockIter) Seek(key []byte) bool {
+ if i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
+ if err != nil {
+ i.sErr(err)
+ return false
+ }
+ i.restartIndex = ri
+ i.offset = max(i.offsetStart, offset)
+ if i.dir == dirSOI || i.dir == dirEOI {
+ i.dir = dirForward
+ }
+ for i.Next() {
+ if i.tr.cmp.Compare(i.key, key) >= 0 {
+ return true
+ }
+ }
+ return false
+}
+
+func (i *blockIter) Next() bool {
+ if i.dir == dirEOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ if i.dir == dirSOI {
+ i.restartIndex = i.riStart
+ i.offset = i.offsetStart
+ } else if i.dir == dirBackward {
+ i.prevNode = i.prevNode[:0]
+ i.prevKeys = i.prevKeys[:0]
+ }
+ for i.offset < i.offsetRealStart {
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.offset += n
+ }
+ if i.offset >= i.offsetLimit {
+ i.dir = dirEOI
+ if i.offset != i.offsetLimit {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ }
+ return false
+ }
+ key, value, nShared, n, err := i.block.entry(i.offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if n == 0 {
+ i.dir = dirEOI
+ return false
+ }
+ i.key = append(i.key[:nShared], key...)
+ i.value = value
+ i.prevOffset = i.offset
+ i.offset += n
+ i.dir = dirForward
+ return true
+}
+
+func (i *blockIter) Prev() bool {
+ if i.dir == dirSOI || i.err != nil {
+ return false
+ } else if i.dir == dirReleased {
+ i.err = ErrIterReleased
+ return false
+ }
+
+ var ri int
+ if i.dir == dirForward {
+ // Change direction.
+ i.offset = i.prevOffset
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset)
+ i.dir = dirBackward
+ } else if i.dir == dirEOI {
+ // At the end of iterator.
+ i.restartIndex = i.riLimit
+ i.offset = i.offsetLimit
+ if i.offset == i.offsetRealStart {
+ i.dir = dirSOI
+ return false
+ }
+ ri = i.riLimit - 1
+ i.dir = dirBackward
+ } else if len(i.prevNode) == 1 {
+ // This is the end of a restart range.
+ i.offset = i.prevNode[0]
+ i.prevNode = i.prevNode[:0]
+ if i.restartIndex == i.riStart {
+ i.dir = dirSOI
+ return false
+ }
+ i.restartIndex--
+ ri = i.restartIndex
+ } else {
+ // In the middle of restart range, get from cache.
+ n := len(i.prevNode) - 3
+ node := i.prevNode[n:]
+ i.prevNode = i.prevNode[:n]
+ // Get the key.
+ ko := node[0]
+ i.key = append(i.key[:0], i.prevKeys[ko:]...)
+ i.prevKeys = i.prevKeys[:ko]
+ // Get the value.
+ vo := node[1]
+ vl := vo + node[2]
+ i.value = i.block.data[vo:vl]
+ i.offset = vl
+ return true
+ }
+ // Build entries cache.
+ i.key = i.key[:0]
+ i.value = nil
+ offset := i.block.restartOffset(ri)
+ if offset == i.offset {
+ ri--
+ if ri < 0 {
+ i.dir = dirSOI
+ return false
+ }
+ offset = i.block.restartOffset(ri)
+ }
+ i.prevNode = append(i.prevNode, offset)
+ for {
+ key, value, nShared, n, err := i.block.entry(offset)
+ if err != nil {
+ i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+ return false
+ }
+ if offset >= i.offsetRealStart {
+ if i.value != nil {
+ // Appends 3 variables:
+ // 1. Previous keys offset
+ // 2. Value offset in the data block
+ // 3. Value length
+ i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value))
+ i.prevKeys = append(i.prevKeys, i.key...)
+ }
+ i.value = value
+ }
+ i.key = append(i.key[:nShared], key...)
+ offset += n
+ // Stop if target offset reached.
+ if offset >= i.offset {
+ if offset != i.offset {
+ i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+ return false
+ }
+
+ break
+ }
+ }
+ i.restartIndex = ri
+ i.offset = offset
+ return true
+}
+
+func (i *blockIter) Key() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.key
+}
+
+func (i *blockIter) Value() []byte {
+ if i.err != nil || i.dir <= dirEOI {
+ return nil
+ }
+ return i.value
+}
+
+func (i *blockIter) Release() {
+ if i.dir != dirReleased {
+ i.tr = nil
+ i.block = nil
+ i.prevNode = nil
+ i.prevKeys = nil
+ i.key = nil
+ i.value = nil
+ i.dir = dirReleased
+ if i.blockReleaser != nil {
+ i.blockReleaser.Release()
+ i.blockReleaser = nil
+ }
+ if i.releaser != nil {
+ i.releaser.Release()
+ i.releaser = nil
+ }
+ }
+}
+
+func (i *blockIter) SetReleaser(releaser util.Releaser) {
+ if i.dir == dirReleased {
+ panic(util.ErrReleased)
+ }
+ if i.releaser != nil && releaser != nil {
+ panic(util.ErrHasReleaser)
+ }
+ i.releaser = releaser
+}
+
+func (i *blockIter) Valid() bool {
+ return i.err == nil && (i.dir == dirBackward || i.dir == dirForward)
+}
+
+func (i *blockIter) Error() error {
+ return i.err
+}
+
+type filterBlock struct {
+ bpool *util.BufferPool
+ data []byte
+ oOffset int
+ baseLg uint
+ filtersNum int
+}
+
+func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
+ i := int(offset >> b.baseLg)
+ if i < b.filtersNum {
+ o := b.data[b.oOffset+i*4:]
+ n := int(binary.LittleEndian.Uint32(o))
+ m := int(binary.LittleEndian.Uint32(o[4:]))
+ if n < m && m <= b.oOffset {
+ return filter.Contains(b.data[n:m], key)
+ } else if n == m {
+ return false
+ }
+ }
+ return true
+}
+
+func (b *filterBlock) Release() {
+ b.bpool.Put(b.data)
+ b.bpool = nil
+ b.data = nil
+}
+
+type indexIter struct {
+ *blockIter
+ tr *Reader
+ slice *util.Range
+ // Options
+ fillCache bool
+}
+
+func (i *indexIter) Get() iterator.Iterator {
+ value := i.Value()
+ if value == nil {
+ return nil
+ }
+ dataBH, n := decodeBlockHandle(value)
+ if n == 0 {
+ return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
+ }
+
+ var slice *util.Range
+ if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
+ slice = i.slice
+ }
+ return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
+}
+
+// Reader is a table reader.
+type Reader struct {
+ mu sync.RWMutex
+ fd storage.FileDesc
+ reader io.ReaderAt
+ cache *cache.NamespaceGetter
+ err error
+ bpool *util.BufferPool
+ // Options
+ o *opt.Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ verifyChecksum bool
+
+ dataEnd int64
+ metaBH, indexBH, filterBH blockHandle
+ indexBlock *block
+ filterBlock *filterBlock
+}
+
+func (r *Reader) blockKind(bh blockHandle) string {
+ switch bh.offset {
+ case r.metaBH.offset:
+ return "meta-block"
+ case r.indexBH.offset:
+ return "index-block"
+ case r.filterBH.offset:
+ if r.filterBH.length > 0 {
+ return "filter-block"
+ }
+ }
+ return "data-block"
+}
+
+func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
+ return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
+}
+
+func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
+ return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
+}
+
+func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
+ if cerr, ok := err.(*ErrCorrupted); ok {
+ cerr.Pos = int64(bh.offset)
+ cerr.Size = int64(bh.length)
+ cerr.Kind = r.blockKind(bh)
+ return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
+ }
+ return err
+}
+
+func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
+ data := r.bpool.Get(int(bh.length + blockTrailerLen))
+ if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
+ return nil, err
+ }
+
+ if verifyChecksum {
+ n := bh.length + 1
+ checksum0 := binary.LittleEndian.Uint32(data[n:])
+ checksum1 := util.NewCRC(data[:n]).Value()
+ if checksum0 != checksum1 {
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
+ }
+ }
+
+ switch data[bh.length] {
+ case blockTypeNoCompression:
+ data = data[:bh.length]
+ case blockTypeSnappyCompression:
+ decLen, err := snappy.DecodedLen(data[:bh.length])
+ if err != nil {
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ decData := r.bpool.Get(decLen)
+ decData, err = snappy.Decode(decData, data[:bh.length])
+ r.bpool.Put(data)
+ if err != nil {
+ r.bpool.Put(decData)
+ return nil, r.newErrCorruptedBH(bh, err.Error())
+ }
+ data = decData
+ default:
+ r.bpool.Put(data)
+ return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
+ }
+ return data, nil
+}
+
+func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
+ data, err := r.readRawBlock(bh, verifyChecksum)
+ if err != nil {
+ return nil, err
+ }
+ restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
+ b := &block{
+ bpool: r.bpool,
+ bh: bh,
+ data: data,
+ restartsLen: restartsLen,
+ restartsOffset: len(data) - (restartsLen+1)*4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *block
+ b, err = r.readBlock(bh, verifyChecksum)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*block)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readBlock(bh, verifyChecksum)
+ return b, b, err
+}
+
+func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
+ data, err := r.readRawBlock(bh, true)
+ if err != nil {
+ return nil, err
+ }
+ n := len(data)
+ if n < 5 {
+ return nil, r.newErrCorruptedBH(bh, "too short")
+ }
+ m := n - 5
+ oOffset := int(binary.LittleEndian.Uint32(data[m:]))
+ if oOffset > m {
+ return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
+ }
+ b := &filterBlock{
+ bpool: r.bpool,
+ data: data,
+ oOffset: oOffset,
+ baseLg: uint(data[n-1]),
+ filtersNum: (m - oOffset) / 4,
+ }
+ return b, nil
+}
+
+func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.cache != nil {
+ var (
+ err error
+ ch *cache.Handle
+ )
+ if fillCache {
+ ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+ var b *filterBlock
+ b, err = r.readFilterBlock(bh)
+ if err != nil {
+ return 0, nil
+ }
+ return cap(b.data), b
+ })
+ } else {
+ ch = r.cache.Get(bh.offset, nil)
+ }
+ if ch != nil {
+ b, ok := ch.Value().(*filterBlock)
+ if !ok {
+ ch.Release()
+ return nil, nil, errors.New("leveldb/table: inconsistent block type")
+ }
+ return b, ch, err
+ } else if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ b, err := r.readFilterBlock(bh)
+ return b, b, err
+}
+
+func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
+ if r.indexBlock == nil {
+ return r.readBlockCached(r.indexBH, true, fillCache)
+ }
+ return r.indexBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
+ if r.filterBlock == nil {
+ return r.readFilterBlockCached(r.filterBH, fillCache)
+ }
+ return r.filterBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
+ bi := &blockIter{
+ tr: r,
+ block: b,
+ blockReleaser: bReleaser,
+ // Valid key should never be nil.
+ key: make([]byte, 0),
+ dir: dirSOI,
+ riStart: 0,
+ riLimit: b.restartsLen,
+ offsetStart: 0,
+ offsetRealStart: 0,
+ offsetLimit: b.restartsOffset,
+ }
+ if slice != nil {
+ if slice.Start != nil {
+ if bi.Seek(slice.Start) {
+ bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
+ bi.offsetStart = b.restartOffset(bi.riStart)
+ bi.offsetRealStart = bi.prevOffset
+ } else {
+ bi.riStart = b.restartsLen
+ bi.offsetStart = b.restartsOffset
+ bi.offsetRealStart = b.restartsOffset
+ }
+ }
+ if slice.Limit != nil {
+ if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
+ bi.offsetLimit = bi.prevOffset
+ bi.riLimit = bi.restartIndex + 1
+ }
+ }
+ bi.reset()
+ if bi.offsetStart > bi.offsetLimit {
+ bi.sErr(errors.New("leveldb/table: invalid slice range"))
+ }
+ }
+ return bi
+}
+
+func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ return r.newBlockIter(b, rel, slice, false)
+}
+
+func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
+}
+
+// NewIterator creates an iterator from the table.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// table. And a nil Range.Limit is treated as a key after all keys in
+// the table.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The returned iterator is not safe for concurrent use and should be released
+// after use.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ return iterator.NewEmptyIterator(r.err)
+ }
+
+ fillCache := !ro.GetDontFillCache()
+ indexBlock, rel, err := r.getIndexBlock(fillCache)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ index := &indexIter{
+ blockIter: r.newBlockIter(indexBlock, rel, slice, true),
+ tr: r,
+ slice: slice,
+ fillCache: !ro.GetDontFillCache(),
+ }
+ return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
+}
+
+func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.getIndexBlock(true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+
+ if !index.Seek(key) {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ // The filter should only used for exact match.
+ if filtered && r.filter != nil {
+ filterBlock, frel, ferr := r.getFilterBlock(true)
+ if ferr == nil {
+ if !filterBlock.contains(r.filter, dataBH.offset, key) {
+ frel.Release()
+ return nil, nil, ErrNotFound
+ }
+ frel.Release()
+ } else if !errors.IsCorrupted(ferr) {
+ return nil, nil, ferr
+ }
+ }
+
+ data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Seek(key) {
+ data.Release()
+ if err = data.Error(); err != nil {
+ return
+ }
+
+ // The nearest greater-than key is the first key of the next block.
+ if !index.Next() {
+ if err = index.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+
+ dataBH, n = decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return nil, nil, r.err
+ }
+
+ data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+ if !data.Next() {
+ data.Release()
+ if err = data.Error(); err == nil {
+ err = ErrNotFound
+ }
+ return
+ }
+ }
+
+ // Key doesn't use block buffer, no need to copy the buffer.
+ rkey = data.Key()
+ if !noValue {
+ if r.bpool == nil {
+ value = data.Value()
+ } else {
+ // Value does use block buffer, and since the buffer will be
+ // recycled, it need to be copied.
+ value = append([]byte{}, data.Value()...)
+ }
+ }
+ data.Release()
+ return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such pair doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+ return r.find(key, filtered, ro, false)
+}
+
+// FindKey finds key that is greater than or equal to the given key.
+// It returns ErrNotFound if the table doesn't contain such key.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such key doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
+ rkey, _, err = r.find(key, filtered, ro, true)
+ return
+}
+
+// Get gets the value for the given key. It returns errors.ErrNotFound
+// if the table does not contain the key.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ rkey, value, err := r.find(key, false, ro, false)
+ if err == nil && r.cmp.Compare(rkey, key) != 0 {
+ value = nil
+ err = ErrNotFound
+ }
+ return
+}
+
+// OffsetOf returns approximate offset for the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
+ r.mu.RLock()
+ defer r.mu.RUnlock()
+
+ if r.err != nil {
+ err = r.err
+ return
+ }
+
+ indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
+ if err != nil {
+ return
+ }
+ defer rel.Release()
+
+ index := r.newBlockIter(indexBlock, nil, nil, true)
+ defer index.Release()
+ if index.Seek(key) {
+ dataBH, n := decodeBlockHandle(index.Value())
+ if n == 0 {
+ r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+ return
+ }
+ offset = int64(dataBH.offset)
+ return
+ }
+ err = index.Error()
+ if err == nil {
+ offset = r.dataEnd
+ }
+ return
+}
+
+// Release implements util.Releaser.
+// It also close the file if it is an io.Closer.
+func (r *Reader) Release() {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if closer, ok := r.reader.(io.Closer); ok {
+ closer.Close()
+ }
+ if r.indexBlock != nil {
+ r.indexBlock.Release()
+ r.indexBlock = nil
+ }
+ if r.filterBlock != nil {
+ r.filterBlock.Release()
+ r.filterBlock = nil
+ }
+ r.reader = nil
+ r.cache = nil
+ r.bpool = nil
+ r.err = ErrReaderReleased
+}
+
+// NewReader creates a new initialized table reader for the file.
+// The fi, cache and bpool is optional and can be nil.
+//
+// The returned table reader instance is safe for concurrent use.
+func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
+ if f == nil {
+ return nil, errors.New("leveldb/table: nil file")
+ }
+
+ r := &Reader{
+ fd: fd,
+ reader: f,
+ cache: cache,
+ bpool: bpool,
+ o: o,
+ cmp: o.GetComparer(),
+ verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
+ }
+
+ if size < footerLen {
+ r.err = r.newErrCorrupted(0, size, "table", "too small")
+ return r, nil
+ }
+
+ footerPos := size - footerLen
+ var footer [footerLen]byte
+ if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
+ return nil, err
+ }
+ if string(footer[footerLen-len(magic):footerLen]) != magic {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
+ return r, nil
+ }
+
+ var n int
+ // Decode the metaindex block handle.
+ r.metaBH, n = decodeBlockHandle(footer[:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
+ return r, nil
+ }
+
+ // Decode the index block handle.
+ r.indexBH, n = decodeBlockHandle(footer[n:])
+ if n == 0 {
+ r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
+ return r, nil
+ }
+
+ // Read metaindex block.
+ metaBlock, err := r.readBlock(r.metaBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+
+ // Set data end.
+ r.dataEnd = int64(r.metaBH.offset)
+
+ // Read metaindex.
+ metaIter := r.newBlockIter(metaBlock, nil, nil, true)
+ for metaIter.Next() {
+ key := string(metaIter.Key())
+ if !strings.HasPrefix(key, "filter.") {
+ continue
+ }
+ fn := key[7:]
+ if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
+ r.filter = f0
+ } else {
+ for _, f0 := range o.GetAltFilters() {
+ if f0.Name() == fn {
+ r.filter = f0
+ break
+ }
+ }
+ }
+ if r.filter != nil {
+ filterBH, n := decodeBlockHandle(metaIter.Value())
+ if n == 0 {
+ continue
+ }
+ r.filterBH = filterBH
+ // Update data end.
+ r.dataEnd = int64(filterBH.offset)
+ break
+ }
+ }
+ metaIter.Release()
+ metaBlock.Release()
+
+ // Cache index and filter block locally, since we don't have global cache.
+ if cache == nil {
+ r.indexBlock, err = r.readBlock(r.indexBH, true)
+ if err != nil {
+ if errors.IsCorrupted(err) {
+ r.err = err
+ return r, nil
+ }
+ return nil, err
+ }
+ if r.filter != nil {
+ r.filterBlock, err = r.readFilterBlock(r.filterBH)
+ if err != nil {
+ if !errors.IsCorrupted(err) {
+ return nil, err
+ }
+
+ // Don't use filter then.
+ r.filter = nil
+ }
+ }
+ }
+
+ return r, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
new file mode 100644
index 00000000..beacdc1f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
@@ -0,0 +1,177 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package table allows read and write sorted key/value.
+package table
+
+import (
+ "encoding/binary"
+)
+
+/*
+Table:
+
+Table is consist of one or more data blocks, an optional filter block
+a metaindex block, an index block and a table footer. Metaindex block
+is a special block used to keep parameters of the table, such as filter
+block name and its block handle. Index block is a special block used to
+keep record of data blocks offset and length, index block use one as
+restart interval. The key used by index block are the last key of preceding
+block, shorter separator of adjacent blocks or shorter successor of the
+last key of the last block. Filter block is an optional block contains
+sequence of filter data generated by a filter generator.
+
+Table data structure:
+ + optional
+ /
+ +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
+ | data block 1 | ... | data block n | filter block | metaindex block | index block | footer |
+ +--------------+--------------+--------------+--------------+-----------------+-------------+--------+
+
+ Each block followed by a 5-bytes trailer contains compression type and checksum.
+
+Table block trailer:
+
+ +---------------------------+-------------------+
+ | compression type (1-byte) | checksum (4-byte) |
+ +---------------------------+-------------------+
+
+ The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
+ type also included in the checksum.
+
+Table footer:
+
+ +------------------- 40-bytes -------------------+
+ / \
+ +------------------------+--------------------+------+-----------------+
+ | metaindex block handle / index block handle / ---- | magic (8-bytes) |
+ +------------------------+--------------------+------+-----------------+
+
+ The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Block:
+
+Block is consist of one or more key/value entries and a block trailer.
+Block entry shares key prefix with its preceding key until a restart
+point reached. A block should contains at least one restart point.
+First restart point are always zero.
+
+Block data structure:
+
+ + restart point + restart point (depends on restart interval)
+ / /
+ +---------------+---------------+---------------+---------------+---------+
+ | block entry 1 | block entry 2 | ... | block entry n | trailer |
+ +---------------+---------------+---------------+---------------+---------+
+
+Key/value entry:
+
+ +---- key len ----+
+ / \
+ +-------+---------+-----------+---------+--------------------+--------------+----------------+
+ | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
+ +-----------------+---------------------+--------------------+--------------+----------------+
+
+ Block entry shares key prefix with its preceding key:
+ Conditions:
+ restart_interval=2
+ entry one : key=deck,value=v1
+ entry two : key=dock,value=v2
+ entry three: key=duck,value=v3
+ The entries will be encoded as follow:
+
+ + restart point (offset=0) + restart point (offset=16)
+ / /
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ | 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" |
+ +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+ \ / \ / \ /
+ +----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+
+
+ The block trailer will contains two restart points:
+
+ +------------+-----------+--------+
+ | 0 | 16 | 2 |
+ +------------+-----------+---+----+
+ \ / \
+ +-- restart points --+ + restart points length
+
+Block trailer:
+
+ +-- 4-bytes --+
+ / \
+ +-----------------+-----------------+-----------------+------------------------------+
+ | restart point 1 | .... | restart point n | restart points len (4-bytes) |
+ +-----------------+-----------------+-----------------+------------------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Filter block:
+
+Filter block consist of one or more filter data and a filter block trailer.
+The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
+
+Filter block data structure:
+
+ + offset 1 + offset 2 + offset n + trailer offset
+ / / / /
+ +---------------+---------------+---------------+---------+
+ | filter data 1 | ... | filter data n | trailer |
+ +---------------+---------------+---------------+---------+
+
+Filter block trailer:
+
+ +- 4-bytes -+
+ / \
+ +---------------+---------------+---------------+-------------------------------+------------------+
+ | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+ +-------------- +---------------+---------------+-------------------------------+------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+const (
+ blockTrailerLen = 5
+ footerLen = 48
+
+ magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
+
+ // The block type gives the per-block compression format.
+ // These constants are part of the file format and should not be changed.
+ blockTypeNoCompression = 0
+ blockTypeSnappyCompression = 1
+
+ // Generate new filter every 2KB of data
+ filterBaseLg = 11
+ filterBase = 1 << filterBaseLg
+)
+
+type blockHandle struct {
+ offset, length uint64
+}
+
+func decodeBlockHandle(src []byte) (blockHandle, int) {
+ offset, n := binary.Uvarint(src)
+ length, m := binary.Uvarint(src[n:])
+ if n == 0 || m == 0 {
+ return blockHandle{}, 0
+ }
+ return blockHandle{offset, length}, n + m
+}
+
+func encodeBlockHandle(dst []byte, b blockHandle) int {
+ n := binary.PutUvarint(dst, b.offset)
+ m := binary.PutUvarint(dst[n:], b.length)
+ return n + m
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
new file mode 100644
index 00000000..b96b271d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
@@ -0,0 +1,375 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/golang/snappy"
+
+ "github.com/syndtr/goleveldb/leveldb/comparer"
+ "github.com/syndtr/goleveldb/leveldb/filter"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func sharedPrefixLen(a, b []byte) int {
+ i, n := 0, len(a)
+ if n > len(b) {
+ n = len(b)
+ }
+ for i < n && a[i] == b[i] {
+ i++
+ }
+ return i
+}
+
+type blockWriter struct {
+ restartInterval int
+ buf util.Buffer
+ nEntries int
+ prevKey []byte
+ restarts []uint32
+ scratch []byte
+}
+
+func (w *blockWriter) append(key, value []byte) {
+ nShared := 0
+ if w.nEntries%w.restartInterval == 0 {
+ w.restarts = append(w.restarts, uint32(w.buf.Len()))
+ } else {
+ nShared = sharedPrefixLen(w.prevKey, key)
+ }
+ n := binary.PutUvarint(w.scratch[0:], uint64(nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared))
+ n += binary.PutUvarint(w.scratch[n:], uint64(len(value)))
+ w.buf.Write(w.scratch[:n])
+ w.buf.Write(key[nShared:])
+ w.buf.Write(value)
+ w.prevKey = append(w.prevKey[:0], key...)
+ w.nEntries++
+}
+
+func (w *blockWriter) finish() {
+ // Write restarts entry.
+ if w.nEntries == 0 {
+ // Must have at least one restart entry.
+ w.restarts = append(w.restarts, 0)
+ }
+ w.restarts = append(w.restarts, uint32(len(w.restarts)))
+ for _, x := range w.restarts {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+}
+
+func (w *blockWriter) reset() {
+ w.buf.Reset()
+ w.nEntries = 0
+ w.restarts = w.restarts[:0]
+}
+
+func (w *blockWriter) bytesLen() int {
+ restartsLen := len(w.restarts)
+ if restartsLen == 0 {
+ restartsLen = 1
+ }
+ return w.buf.Len() + 4*restartsLen + 4
+}
+
+type filterWriter struct {
+ generator filter.FilterGenerator
+ buf util.Buffer
+ nKeys int
+ offsets []uint32
+}
+
+func (w *filterWriter) add(key []byte) {
+ if w.generator == nil {
+ return
+ }
+ w.generator.Add(key)
+ w.nKeys++
+}
+
+func (w *filterWriter) flush(offset uint64) {
+ if w.generator == nil {
+ return
+ }
+ for x := int(offset / filterBase); x > len(w.offsets); {
+ w.generate()
+ }
+}
+
+func (w *filterWriter) finish() {
+ if w.generator == nil {
+ return
+ }
+ // Generate last keys.
+
+ if w.nKeys > 0 {
+ w.generate()
+ }
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ for _, x := range w.offsets {
+ buf4 := w.buf.Alloc(4)
+ binary.LittleEndian.PutUint32(buf4, x)
+ }
+ w.buf.WriteByte(filterBaseLg)
+}
+
+func (w *filterWriter) generate() {
+ // Record offset.
+ w.offsets = append(w.offsets, uint32(w.buf.Len()))
+ // Generate filters.
+ if w.nKeys > 0 {
+ w.generator.Generate(&w.buf)
+ w.nKeys = 0
+ }
+}
+
+// Writer is a table writer.
+type Writer struct {
+ writer io.Writer
+ err error
+ // Options
+ cmp comparer.Comparer
+ filter filter.Filter
+ compression opt.Compression
+ blockSize int
+
+ dataBlock blockWriter
+ indexBlock blockWriter
+ filterBlock filterWriter
+ pendingBH blockHandle
+ offset uint64
+ nEntries int
+ // Scratch allocated enough for 5 uvarint. Block writer should not use
+ // first 20-bytes since it will be used to encode block handle, which
+ // then passed to the block writer itself.
+ scratch [50]byte
+ comparerScratch []byte
+ compressionScratch []byte
+}
+
+func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) {
+ // Compress the buffer if necessary.
+ var b []byte
+ if compression == opt.SnappyCompression {
+ // Allocate scratch enough for compression and block trailer.
+ if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n {
+ w.compressionScratch = make([]byte, n)
+ }
+ compressed := snappy.Encode(w.compressionScratch, buf.Bytes())
+ n := len(compressed)
+ b = compressed[:n+blockTrailerLen]
+ b[n] = blockTypeSnappyCompression
+ } else {
+ tmp := buf.Alloc(blockTrailerLen)
+ tmp[0] = blockTypeNoCompression
+ b = buf.Bytes()
+ }
+
+ // Calculate the checksum.
+ n := len(b) - 4
+ checksum := util.NewCRC(b[:n]).Value()
+ binary.LittleEndian.PutUint32(b[n:], checksum)
+
+ // Write the buffer to the file.
+ _, err = w.writer.Write(b)
+ if err != nil {
+ return
+ }
+ bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)}
+ w.offset += uint64(len(b))
+ return
+}
+
+func (w *Writer) flushPendingBH(key []byte) {
+ if w.pendingBH.length == 0 {
+ return
+ }
+ var separator []byte
+ if len(key) == 0 {
+ separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey)
+ } else {
+ separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key)
+ }
+ if separator == nil {
+ separator = w.dataBlock.prevKey
+ } else {
+ w.comparerScratch = separator
+ }
+ n := encodeBlockHandle(w.scratch[:20], w.pendingBH)
+ // Append the block handle to the index block.
+ w.indexBlock.append(separator, w.scratch[:n])
+ // Reset prev key of the data block.
+ w.dataBlock.prevKey = w.dataBlock.prevKey[:0]
+ // Clear pending block handle.
+ w.pendingBH = blockHandle{}
+}
+
+func (w *Writer) finishBlock() error {
+ w.dataBlock.finish()
+ bh, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ return err
+ }
+ w.pendingBH = bh
+ // Reset the data block.
+ w.dataBlock.reset()
+ // Flush the filter block.
+ w.filterBlock.flush(w.offset)
+ return nil
+}
+
+// Append appends key/value pair to the table. The keys passed must
+// be in increasing order.
+//
+// It is safe to modify the contents of the arguments after Append returns.
+func (w *Writer) Append(key, value []byte) error {
+ if w.err != nil {
+ return w.err
+ }
+ if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 {
+ w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key)
+ return w.err
+ }
+
+ w.flushPendingBH(key)
+ // Append key/value pair to the data block.
+ w.dataBlock.append(key, value)
+ // Add key to the filter block.
+ w.filterBlock.add(key)
+
+ // Finish the data block if block size target reached.
+ if w.dataBlock.bytesLen() >= w.blockSize {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.nEntries++
+ return nil
+}
+
+// BlocksLen returns number of blocks written so far.
+func (w *Writer) BlocksLen() int {
+ n := w.indexBlock.nEntries
+ if w.pendingBH.length > 0 {
+ // Includes the pending block.
+ n++
+ }
+ return n
+}
+
+// EntriesLen returns number of entries added so far.
+func (w *Writer) EntriesLen() int {
+ return w.nEntries
+}
+
+// BytesLen returns number of bytes written so far.
+func (w *Writer) BytesLen() int {
+ return int(w.offset)
+}
+
+// Close will finalize the table. Calling Append is not possible
+// after Close, but calling BlocksLen, EntriesLen and BytesLen
+// is still possible.
+func (w *Writer) Close() error {
+ if w.err != nil {
+ return w.err
+ }
+
+ // Write the last data block. Or empty data block if there
+ // aren't any data blocks at all.
+ if w.dataBlock.nEntries > 0 || w.nEntries == 0 {
+ if err := w.finishBlock(); err != nil {
+ w.err = err
+ return w.err
+ }
+ }
+ w.flushPendingBH(nil)
+
+ // Write the filter block.
+ var filterBH blockHandle
+ w.filterBlock.finish()
+ if buf := &w.filterBlock.buf; buf.Len() > 0 {
+ filterBH, w.err = w.writeBlock(buf, opt.NoCompression)
+ if w.err != nil {
+ return w.err
+ }
+ }
+
+ // Write the metaindex block.
+ if filterBH.length > 0 {
+ key := []byte("filter." + w.filter.Name())
+ n := encodeBlockHandle(w.scratch[:20], filterBH)
+ w.dataBlock.append(key, w.scratch[:n])
+ }
+ w.dataBlock.finish()
+ metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the index block.
+ w.indexBlock.finish()
+ indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression)
+ if err != nil {
+ w.err = err
+ return w.err
+ }
+
+ // Write the table footer.
+ footer := w.scratch[:footerLen]
+ for i := range footer {
+ footer[i] = 0
+ }
+ n := encodeBlockHandle(footer, metaindexBH)
+ encodeBlockHandle(footer[n:], indexBH)
+ copy(footer[footerLen-len(magic):], magic)
+ if _, err := w.writer.Write(footer); err != nil {
+ w.err = err
+ return w.err
+ }
+ w.offset += footerLen
+
+ w.err = errors.New("leveldb/table: writer is closed")
+ return nil
+}
+
+// NewWriter creates a new initialized table writer for the file.
+//
+// Table writer is not safe for concurrent use.
+func NewWriter(f io.Writer, o *opt.Options) *Writer {
+ w := &Writer{
+ writer: f,
+ cmp: o.GetComparer(),
+ filter: o.GetFilter(),
+ compression: o.GetCompression(),
+ blockSize: o.GetBlockSize(),
+ comparerScratch: make([]byte, 0),
+ }
+ // data block
+ w.dataBlock.restartInterval = o.GetBlockRestartInterval()
+ // The first 20-bytes are used for encoding block handle.
+ w.dataBlock.scratch = w.scratch[20:]
+ // index block
+ w.indexBlock.restartInterval = 1
+ w.indexBlock.scratch = w.scratch[20:]
+ // filter block
+ if w.filter != nil {
+ w.filterBlock.generator = w.filter.NewGenerator()
+ w.filterBlock.flush(0)
+ }
+ return w
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
new file mode 100644
index 00000000..0e2b519e
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+
+ "github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+func shorten(str string) string {
+ if len(str) <= 8 {
+ return str
+ }
+ return str[:3] + ".." + str[len(str)-3:]
+}
+
+var bunits = [...]string{"", "Ki", "Mi", "Gi", "Ti"}
+
+func shortenb(bytes int) string {
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%d%sB", bytes, bunits[i])
+}
+
+func sshortenb(bytes int) string {
+ if bytes == 0 {
+ return "~"
+ }
+ sign := "+"
+ if bytes < 0 {
+ sign = "-"
+ bytes *= -1
+ }
+ i := 0
+ for ; bytes > 1024 && i < 4; i++ {
+ bytes /= 1024
+ }
+ return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i])
+}
+
+func sint(x int) string {
+ if x == 0 {
+ return "~"
+ }
+ sign := "+"
+ if x < 0 {
+ sign = "-"
+ x *= -1
+ }
+ return fmt.Sprintf("%s%d", sign, x)
+}
+
+func minInt(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+func maxInt(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
+type fdSorter []storage.FileDesc
+
+func (p fdSorter) Len() int {
+ return len(p)
+}
+
+func (p fdSorter) Less(i, j int) bool {
+ return p[i].Num < p[j].Num
+}
+
+func (p fdSorter) Swap(i, j int) {
+ p[i], p[j] = p[j], p[i]
+}
+
+func sortFds(fds []storage.FileDesc) {
+ sort.Sort(fdSorter(fds))
+}
+
+func ensureBuffer(b []byte, n int) []byte {
+ if cap(b) < n {
+ return make([]byte, n)
+ }
+ return b[:n]
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
new file mode 100644
index 00000000..21de2425
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
@@ -0,0 +1,293 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+// This a copy of Go std bytes.Buffer with some modification
+// and some features stripped.
+
+import (
+ "bytes"
+ "io"
+)
+
+// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
+// The zero value for Buffer is an empty buffer ready to use.
+type Buffer struct {
+ buf []byte // contents are the bytes buf[off : len(buf)]
+ off int // read at &buf[off], write at &buf[len(buf)]
+ bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation.
+}
+
+// Bytes returns a slice of the contents of the unread portion of the buffer;
+// len(b.Bytes()) == b.Len(). If the caller changes the contents of the
+// returned slice, the contents of the buffer will change provided there
+// are no intervening method calls on the Buffer.
+func (b *Buffer) Bytes() []byte { return b.buf[b.off:] }
+
+// String returns the contents of the unread portion of the buffer
+// as a string. If the Buffer is a nil pointer, it returns "".
+func (b *Buffer) String() string {
+ if b == nil {
+ // Special case, useful in debugging.
+ return ""
+ }
+ return string(b.buf[b.off:])
+}
+
+// Len returns the number of bytes of the unread portion of the buffer;
+// b.Len() == len(b.Bytes()).
+func (b *Buffer) Len() int { return len(b.buf) - b.off }
+
+// Truncate discards all but the first n unread bytes from the buffer.
+// It panics if n is negative or greater than the length of the buffer.
+func (b *Buffer) Truncate(n int) {
+ switch {
+ case n < 0 || n > b.Len():
+ panic("leveldb/util.Buffer: truncation out of range")
+ case n == 0:
+ // Reuse buffer space.
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+n]
+}
+
+// Reset resets the buffer so it has no content.
+// b.Reset() is the same as b.Truncate(0).
+func (b *Buffer) Reset() { b.Truncate(0) }
+
+// grow grows the buffer to guarantee space for n more bytes.
+// It returns the index where bytes should be written.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) grow(n int) int {
+ m := b.Len()
+ // If buffer is empty, reset to recover space.
+ if m == 0 && b.off != 0 {
+ b.Truncate(0)
+ }
+ if len(b.buf)+n > cap(b.buf) {
+ var buf []byte
+ if b.buf == nil && n <= len(b.bootstrap) {
+ buf = b.bootstrap[0:]
+ } else if m+n <= cap(b.buf)/2 {
+ // We can slide things down instead of allocating a new
+ // slice. We only need m+n <= cap(b.buf) to slide, but
+ // we instead let capacity get twice as large so we
+ // don't spend all our time copying.
+ copy(b.buf[:], b.buf[b.off:])
+ buf = b.buf[:m]
+ } else {
+ // not enough space anywhere
+ buf = makeSlice(2*cap(b.buf) + n)
+ copy(buf, b.buf[b.off:])
+ }
+ b.buf = buf
+ b.off = 0
+ }
+ b.buf = b.buf[0 : b.off+m+n]
+ return b.off + m
+}
+
+// Alloc allocs n bytes of slice from the buffer, growing the buffer as
+// needed. If n is negative, Alloc will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Alloc(n int) []byte {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Alloc: negative count")
+ }
+ m := b.grow(n)
+ return b.buf[m:]
+}
+
+// Grow grows the buffer's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to the
+// buffer without another allocation.
+// If n is negative, Grow will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Grow(n int) {
+ if n < 0 {
+ panic("leveldb/util.Buffer.Grow: negative count")
+ }
+ m := b.grow(n)
+ b.buf = b.buf[0:m]
+}
+
+// Write appends the contents of p to the buffer, growing the buffer as
+// needed. The return value n is the length of p; err is always nil. If the
+// buffer becomes too large, Write will panic with bytes.ErrTooLarge.
+func (b *Buffer) Write(p []byte) (n int, err error) {
+ m := b.grow(len(p))
+ return copy(b.buf[m:], p), nil
+}
+
+// MinRead is the minimum slice size passed to a Read call by
+// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond
+// what is required to hold the contents of r, ReadFrom will not grow the
+// underlying buffer.
+const MinRead = 512
+
+// ReadFrom reads data from r until EOF and appends it to the buffer, growing
+// the buffer as needed. The return value n is the number of bytes read. Any
+// error except io.EOF encountered during the read is also returned. If the
+// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge.
+func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) {
+ // If buffer is empty, reset to recover space.
+ if b.off >= len(b.buf) {
+ b.Truncate(0)
+ }
+ for {
+ if free := cap(b.buf) - len(b.buf); free < MinRead {
+ // not enough space at end
+ newBuf := b.buf
+ if b.off+free < MinRead {
+ // not enough space using beginning of buffer;
+ // double buffer capacity
+ newBuf = makeSlice(2*cap(b.buf) + MinRead)
+ }
+ copy(newBuf, b.buf[b.off:])
+ b.buf = newBuf[:len(b.buf)-b.off]
+ b.off = 0
+ }
+ m, e := r.Read(b.buf[len(b.buf):cap(b.buf)])
+ b.buf = b.buf[0 : len(b.buf)+m]
+ n += int64(m)
+ if e == io.EOF {
+ break
+ }
+ if e != nil {
+ return n, e
+ }
+ }
+ return n, nil // err is EOF, so return nil explicitly
+}
+
+// makeSlice allocates a slice of size n. If the allocation fails, it panics
+// with bytes.ErrTooLarge.
+func makeSlice(n int) []byte {
+ // If the make fails, give a known error.
+ defer func() {
+ if recover() != nil {
+ panic(bytes.ErrTooLarge)
+ }
+ }()
+ return make([]byte, n)
+}
+
+// WriteTo writes data to w until the buffer is drained or an error occurs.
+// The return value n is the number of bytes written; it always fits into an
+// int, but it is int64 to match the io.WriterTo interface. Any error
+// encountered during the write is also returned.
+func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) {
+ if b.off < len(b.buf) {
+ nBytes := b.Len()
+ m, e := w.Write(b.buf[b.off:])
+ if m > nBytes {
+ panic("leveldb/util.Buffer.WriteTo: invalid Write count")
+ }
+ b.off += m
+ n = int64(m)
+ if e != nil {
+ return n, e
+ }
+ // all bytes should have been written, by definition of
+ // Write method in io.Writer
+ if m != nBytes {
+ return n, io.ErrShortWrite
+ }
+ }
+ // Buffer is now empty; reset.
+ b.Truncate(0)
+ return
+}
+
+// WriteByte appends the byte c to the buffer, growing the buffer as needed.
+// The returned error is always nil, but is included to match bufio.Writer's
+// WriteByte. If the buffer becomes too large, WriteByte will panic with
+// bytes.ErrTooLarge.
+func (b *Buffer) WriteByte(c byte) error {
+ m := b.grow(1)
+ b.buf[m] = c
+ return nil
+}
+
+// Read reads the next len(p) bytes from the buffer or until the buffer
+// is drained. The return value n is the number of bytes read. If the
+// buffer has no data to return, err is io.EOF (unless len(p) is zero);
+// otherwise it is nil.
+func (b *Buffer) Read(p []byte) (n int, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ if len(p) == 0 {
+ return
+ }
+ return 0, io.EOF
+ }
+ n = copy(p, b.buf[b.off:])
+ b.off += n
+ return
+}
+
+// Next returns a slice containing the next n bytes from the buffer,
+// advancing the buffer as if the bytes had been returned by Read.
+// If there are fewer than n bytes in the buffer, Next returns the entire buffer.
+// The slice is only valid until the next call to a read or write method.
+func (b *Buffer) Next(n int) []byte {
+ m := b.Len()
+ if n > m {
+ n = m
+ }
+ data := b.buf[b.off : b.off+n]
+ b.off += n
+ return data
+}
+
+// ReadByte reads and returns the next byte from the buffer.
+// If no byte is available, it returns error io.EOF.
+func (b *Buffer) ReadByte() (c byte, err error) {
+ if b.off >= len(b.buf) {
+ // Buffer is empty, reset to recover space.
+ b.Truncate(0)
+ return 0, io.EOF
+ }
+ c = b.buf[b.off]
+ b.off++
+ return c, nil
+}
+
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
+ slice, err := b.readSlice(delim)
+ // return a copy of slice. The buffer's backing array may
+ // be overwritten by later calls.
+ line = append(line, slice...)
+ return
+}
+
+// readSlice is like ReadBytes but returns a reference to internal buffer data.
+func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
+ i := bytes.IndexByte(b.buf[b.off:], delim)
+ end := b.off + i + 1
+ if i < 0 {
+ end = len(b.buf)
+ err = io.EOF
+ }
+ line = b.buf[b.off:end]
+ b.off = end
+ return line, err
+}
+
+// NewBuffer creates and initializes a new Buffer using buf as its initial
+// contents. It is intended to prepare a Buffer to read existing data. It
+// can also be used to size the internal buffer for writing. To do that,
+// buf should have the desired capacity but a length of zero.
+//
+// In most cases, new(Buffer) (or just declaring a Buffer variable) is
+// sufficient to initialize a Buffer.
+func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
new file mode 100644
index 00000000..2f3db974
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2014, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type buffer struct {
+ b []byte
+ miss int
+}
+
+// BufferPool is a 'buffer pool'.
+type BufferPool struct {
+ pool [6]chan []byte
+ size [5]uint32
+ sizeMiss [5]uint32
+ sizeHalf [5]uint32
+ baseline [4]int
+ baseline0 int
+
+ mu sync.RWMutex
+ closed bool
+ closeC chan struct{}
+
+ get uint32
+ put uint32
+ half uint32
+ less uint32
+ equal uint32
+ greater uint32
+ miss uint32
+}
+
+func (p *BufferPool) poolNum(n int) int {
+ if n <= p.baseline0 && n > p.baseline0/2 {
+ return 0
+ }
+ for i, x := range p.baseline {
+ if n <= x {
+ return i + 1
+ }
+ }
+ return len(p.baseline) + 1
+}
+
+// Get returns buffer with length of n.
+func (p *BufferPool) Get(n int) []byte {
+ if p == nil {
+ return make([]byte, n)
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return make([]byte, n)
+ }
+
+ atomic.AddUint32(&p.get, 1)
+
+ poolNum := p.poolNum(n)
+ pool := p.pool[poolNum]
+ if poolNum == 0 {
+ // Fast path.
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ select {
+ case pool <- b:
+ default:
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ return make([]byte, n, p.baseline0)
+ } else {
+ sizePtr := &p.size[poolNum-1]
+
+ select {
+ case b := <-pool:
+ switch {
+ case cap(b) > n:
+ if cap(b)-n >= n {
+ atomic.AddUint32(&p.half, 1)
+ sizeHalfPtr := &p.sizeHalf[poolNum-1]
+ if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
+ atomic.StoreUint32(sizeHalfPtr, 0)
+ } else {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ return make([]byte, n)
+ } else {
+ atomic.AddUint32(&p.less, 1)
+ return b[:n]
+ }
+ case cap(b) == n:
+ atomic.AddUint32(&p.equal, 1)
+ return b[:n]
+ default:
+ atomic.AddUint32(&p.greater, 1)
+ if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
+ select {
+ case pool <- b:
+ default:
+ }
+ }
+ }
+ default:
+ atomic.AddUint32(&p.miss, 1)
+ }
+
+ if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
+ if size == 0 {
+ atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
+ } else {
+ sizeMissPtr := &p.sizeMiss[poolNum-1]
+ if atomic.AddUint32(sizeMissPtr, 1) == 20 {
+ atomic.StoreUint32(sizePtr, uint32(n))
+ atomic.StoreUint32(sizeMissPtr, 0)
+ }
+ }
+ return make([]byte, n)
+ } else {
+ return make([]byte, n, size)
+ }
+ }
+}
+
+// Put adds given buffer to the pool.
+func (p *BufferPool) Put(b []byte) {
+ if p == nil {
+ return
+ }
+
+ p.mu.RLock()
+ defer p.mu.RUnlock()
+
+ if p.closed {
+ return
+ }
+
+ atomic.AddUint32(&p.put, 1)
+
+ pool := p.pool[p.poolNum(cap(b))]
+ select {
+ case pool <- b:
+ default:
+ }
+
+}
+
+func (p *BufferPool) Close() {
+ if p == nil {
+ return
+ }
+
+ p.mu.Lock()
+ if !p.closed {
+ p.closed = true
+ p.closeC <- struct{}{}
+ }
+ p.mu.Unlock()
+}
+
+func (p *BufferPool) String() string {
+ if p == nil {
+ return ""
+ }
+
+ return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
+ p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
+}
+
+func (p *BufferPool) drain() {
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ticker.C:
+ for _, ch := range p.pool {
+ select {
+ case <-ch:
+ default:
+ }
+ }
+ case <-p.closeC:
+ close(p.closeC)
+ for _, ch := range p.pool {
+ close(ch)
+ }
+ return
+ }
+ }
+}
+
+// NewBufferPool creates a new initialized 'buffer pool'.
+func NewBufferPool(baseline int) *BufferPool {
+ if baseline <= 0 {
+ panic("baseline can't be <= 0")
+ }
+ p := &BufferPool{
+ baseline0: baseline,
+ baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
+ closeC: make(chan struct{}, 1),
+ }
+ for i, cap := range []int{2, 2, 4, 4, 2, 1} {
+ p.pool[i] = make(chan []byte, cap)
+ }
+ go p.drain()
+ return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
new file mode 100644
index 00000000..631c9d61
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
@@ -0,0 +1,30 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "hash/crc32"
+)
+
+var table = crc32.MakeTable(crc32.Castagnoli)
+
+// CRC is a CRC-32 checksum computed using Castagnoli's polynomial.
+type CRC uint32
+
+// NewCRC creates a new crc based on the given bytes.
+func NewCRC(b []byte) CRC {
+ return CRC(0).Update(b)
+}
+
+// Update updates the crc with the given bytes.
+func (c CRC) Update(b []byte) CRC {
+ return CRC(crc32.Update(uint32(c), table, b))
+}
+
+// Value returns a masked crc.
+func (c CRC) Value() uint32 {
+ return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
new file mode 100644
index 00000000..7f3fa4e2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
@@ -0,0 +1,48 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+ "encoding/binary"
+)
+
+// Hash return hash of the given data.
+func Hash(data []byte, seed uint32) uint32 {
+ // Similar to murmur hash
+ const (
+ m = uint32(0xc6a4a793)
+ r = uint32(24)
+ )
+ var (
+ h = seed ^ (uint32(len(data)) * m)
+ i int
+ )
+
+ for n := len(data) - len(data)%4; i < n; i += 4 {
+ h += binary.LittleEndian.Uint32(data[i:])
+ h *= m
+ h ^= (h >> 16)
+ }
+
+ switch len(data) - i {
+ default:
+ panic("not reached")
+ case 3:
+ h += uint32(data[i+2]) << 16
+ fallthrough
+ case 2:
+ h += uint32(data[i+1]) << 8
+ fallthrough
+ case 1:
+ h += uint32(data[i])
+ h *= m
+ h ^= (h >> r)
+ case 0:
+ }
+
+ return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
new file mode 100644
index 00000000..85159583
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
@@ -0,0 +1,32 @@
+// Copyright (c) 2014, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+// Range is a key range.
+type Range struct {
+ // Start of the key range, include in the range.
+ Start []byte
+
+ // Limit of the key range, not include in the range.
+ Limit []byte
+}
+
+// BytesPrefix returns key range that satisfy the given prefix.
+// This only applicable for the standard 'bytes comparer'.
+func BytesPrefix(prefix []byte) *Range {
+ var limit []byte
+ for i := len(prefix) - 1; i >= 0; i-- {
+ c := prefix[i]
+ if c < 0xff {
+ limit = make([]byte, i+1)
+ copy(limit, prefix)
+ limit[i] = c + 1
+ break
+ }
+ }
+ return &Range{prefix, limit}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
new file mode 100644
index 00000000..80614afc
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
@@ -0,0 +1,73 @@
+// Copyright (c) 2013, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package util provides utilities used throughout leveldb.
+package util
+
+import (
+ "errors"
+)
+
+var (
+ ErrReleased = errors.New("leveldb: resource already relesed")
+ ErrHasReleaser = errors.New("leveldb: releaser already defined")
+)
+
+// Releaser is the interface that wraps the basic Release method.
+type Releaser interface {
+ // Release releases associated resources. Release should always success
+ // and can be called multiple times without causing error.
+ Release()
+}
+
+// ReleaseSetter is the interface that wraps the basic SetReleaser method.
+type ReleaseSetter interface {
+ // SetReleaser associates the given releaser to the resources. The
+ // releaser will be called once coresponding resources released.
+ // Calling SetReleaser with nil will clear the releaser.
+ //
+ // This will panic if a releaser already present or coresponding
+ // resource is already released. Releaser should be cleared first
+ // before assigned a new one.
+ SetReleaser(releaser Releaser)
+}
+
+// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
+type BasicReleaser struct {
+ releaser Releaser
+ released bool
+}
+
+// Released returns whether Release method already called.
+func (r *BasicReleaser) Released() bool {
+ return r.released
+}
+
+// Release implements Releaser.Release.
+func (r *BasicReleaser) Release() {
+ if !r.released {
+ if r.releaser != nil {
+ r.releaser.Release()
+ r.releaser = nil
+ }
+ r.released = true
+ }
+}
+
+// SetReleaser implements ReleaseSetter.SetReleaser.
+func (r *BasicReleaser) SetReleaser(releaser Releaser) {
+ if r.released {
+ panic(ErrReleased)
+ }
+ if r.releaser != nil && releaser != nil {
+ panic(ErrHasReleaser)
+ }
+ r.releaser = releaser
+}
+
+type NoopReleaser struct{}
+
+func (NoopReleaser) Release() {}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
new file mode 100644
index 00000000..73f272af
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
@@ -0,0 +1,528 @@
+// Copyright (c) 2012, Suryandaru Triandana
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sync/atomic"
+ "unsafe"
+
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type tSet struct {
+ level int
+ table *tFile
+}
+
+type version struct {
+ s *session
+
+ levels []tFiles
+
+ // Level that should be compacted next and its compaction score.
+ // Score < 1 means compaction is not strictly needed. These fields
+ // are initialized by computeCompaction()
+ cLevel int
+ cScore float64
+
+ cSeek unsafe.Pointer
+
+ closing bool
+ ref int
+ released bool
+}
+
+func newVersion(s *session) *version {
+ return &version{s: s}
+}
+
+func (v *version) incref() {
+ if v.released {
+ panic("already released")
+ }
+
+ v.ref++
+ if v.ref == 1 {
+ // Incr file ref.
+ for _, tt := range v.levels {
+ for _, t := range tt {
+ v.s.addFileRef(t.fd, 1)
+ }
+ }
+ }
+}
+
+func (v *version) releaseNB() {
+ v.ref--
+ if v.ref > 0 {
+ return
+ } else if v.ref < 0 {
+ panic("negative version ref")
+ }
+
+ for _, tt := range v.levels {
+ for _, t := range tt {
+ if v.s.addFileRef(t.fd, -1) == 0 {
+ v.s.tops.remove(t)
+ }
+ }
+ }
+
+ v.released = true
+}
+
+func (v *version) release() {
+ v.s.vmu.Lock()
+ v.releaseNB()
+ v.s.vmu.Unlock()
+}
+
+func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
+ ukey := ikey.ukey()
+
+ // Aux level.
+ if aux != nil {
+ for _, t := range aux {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(-1, t) {
+ return
+ }
+ }
+ }
+
+ if lf != nil && !lf(-1) {
+ return
+ }
+ }
+
+ // Walk tables level-by-level.
+ for level, tables := range v.levels {
+ if len(tables) == 0 {
+ continue
+ }
+
+ if level == 0 {
+ // Level-0 files may overlap each other. Find all files that
+ // overlap ukey.
+ for _, t := range tables {
+ if t.overlaps(v.s.icmp, ukey, ukey) {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ } else {
+ if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
+ t := tables[i]
+ if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+ if !f(level, t) {
+ return
+ }
+ }
+ }
+ }
+
+ if lf != nil && !lf(level) {
+ return
+ }
+ }
+}
+
+func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
+ if v.closing {
+ return nil, false, ErrClosed
+ }
+
+ ukey := ikey.ukey()
+
+ var (
+ tset *tSet
+ tseek bool
+
+ // Level-0.
+ zfound bool
+ zseq uint64
+ zkt keyType
+ zval []byte
+ )
+
+ err = ErrNotFound
+
+ // Since entries never hop across level, finding key/value
+ // in smaller level make later levels irrelevant.
+ v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
+ if level >= 0 && !tseek {
+ if tset == nil {
+ tset = &tSet{level, t}
+ } else {
+ tseek = true
+ }
+ }
+
+ var (
+ fikey, fval []byte
+ ferr error
+ )
+ if noValue {
+ fikey, ferr = v.s.tops.findKey(t, ikey, ro)
+ } else {
+ fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
+ }
+
+ switch ferr {
+ case nil:
+ case ErrNotFound:
+ return true
+ default:
+ err = ferr
+ return false
+ }
+
+ if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
+ if v.s.icmp.uCompare(ukey, fukey) == 0 {
+ // Level <= 0 may overlaps each-other.
+ if level <= 0 {
+ if fseq >= zseq {
+ zfound = true
+ zseq = fseq
+ zkt = fkt
+ zval = fval
+ }
+ } else {
+ switch fkt {
+ case keyTypeVal:
+ value = fval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+ }
+ } else {
+ err = fkerr
+ return false
+ }
+
+ return true
+ }, func(level int) bool {
+ if zfound {
+ switch zkt {
+ case keyTypeVal:
+ value = zval
+ err = nil
+ case keyTypeDel:
+ default:
+ panic("leveldb: invalid internalKey type")
+ }
+ return false
+ }
+
+ return true
+ })
+
+ if tseek && tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+
+ return
+}
+
+func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
+ var tset *tSet
+
+ v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
+ if tset == nil {
+ tset = &tSet{level, t}
+ return true
+ }
+ if tset.table.consumeSeek() <= 0 {
+ tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+ }
+ return false
+ }, nil)
+
+ return
+}
+
+func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
+ strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
+ for level, tables := range v.levels {
+ if level == 0 {
+ // Merge all level zero files together since they may overlap.
+ for _, t := range tables {
+ its = append(its, v.s.tops.newIterator(t, slice, ro))
+ }
+ } else if len(tables) != 0 {
+ its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
+ }
+ }
+ return
+}
+
+func (v *version) newStaging() *versionStaging {
+ return &versionStaging{base: v}
+}
+
+// Spawn a new version based on this version.
+func (v *version) spawn(r *sessionRecord) *version {
+ staging := v.newStaging()
+ staging.commit(r)
+ return staging.finish()
+}
+
+func (v *version) fillRecord(r *sessionRecord) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ r.addTableFile(level, t)
+ }
+ }
+}
+
+func (v *version) tLen(level int) int {
+ if level < len(v.levels) {
+ return len(v.levels[level])
+ }
+ return 0
+}
+
+func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
+ for level, tables := range v.levels {
+ for _, t := range tables {
+ if v.s.icmp.Compare(t.imax, ikey) <= 0 {
+ // Entire file is before "ikey", so just add the file size
+ n += t.size
+ } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
+ // Entire file is after "ikey", so ignore
+ if level > 0 {
+ // Files other than level 0 are sorted by meta->min, so
+ // no further files in this level will contain data for
+ // "ikey".
+ break
+ }
+ } else {
+ // "ikey" falls in the range for this table. Add the
+ // approximate offset of "ikey" within the table.
+ if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
+ n += m
+ } else {
+ return 0, err
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
+ if maxLevel > 0 {
+ if len(v.levels) == 0 {
+ return maxLevel
+ }
+ if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
+ var overlaps tFiles
+ for ; level < maxLevel; level++ {
+ if pLevel := level + 1; pLevel >= len(v.levels) {
+ return maxLevel
+ } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
+ break
+ }
+ if gpLevel := level + 2; gpLevel < len(v.levels) {
+ overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
+ if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
+ break
+ }
+ }
+ }
+ }
+ }
+ return
+}
+
+func (v *version) computeCompaction() {
+ // Precomputed best level for next compaction
+ bestLevel := int(-1)
+ bestScore := float64(-1)
+
+ statFiles := make([]int, len(v.levels))
+ statSizes := make([]string, len(v.levels))
+ statScore := make([]string, len(v.levels))
+ statTotSize := int64(0)
+
+ for level, tables := range v.levels {
+ var score float64
+ size := tables.size()
+ if level == 0 {
+ // We treat level-0 specially by bounding the number of files
+ // instead of number of bytes for two reasons:
+ //
+ // (1) With larger write-buffer sizes, it is nice not to do too
+ // many level-0 compaction.
+ //
+ // (2) The files in level-0 are merged on every read and
+ // therefore we wish to avoid too many files when the individual
+ // file size is small (perhaps because of a small write-buffer
+ // setting, or very high compression ratios, or lots of
+ // overwrites/deletions).
+ score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
+ } else {
+ score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
+ }
+
+ if score > bestScore {
+ bestLevel = level
+ bestScore = score
+ }
+
+ statFiles[level] = len(tables)
+ statSizes[level] = shortenb(int(size))
+ statScore[level] = fmt.Sprintf("%.2f", score)
+ statTotSize += size
+ }
+
+ v.cLevel = bestLevel
+ v.cScore = bestScore
+
+ v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
+}
+
+func (v *version) needCompaction() bool {
+ return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
+}
+
+type tablesScratch struct {
+ added map[int64]atRecord
+ deleted map[int64]struct{}
+}
+
+type versionStaging struct {
+ base *version
+ levels []tablesScratch
+}
+
+func (p *versionStaging) getScratch(level int) *tablesScratch {
+ if level >= len(p.levels) {
+ newLevels := make([]tablesScratch, level+1)
+ copy(newLevels, p.levels)
+ p.levels = newLevels
+ }
+ return &(p.levels[level])
+}
+
+func (p *versionStaging) commit(r *sessionRecord) {
+ // Deleted tables.
+ for _, r := range r.deletedTables {
+ scratch := p.getScratch(r.level)
+ if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
+ if scratch.deleted == nil {
+ scratch.deleted = make(map[int64]struct{})
+ }
+ scratch.deleted[r.num] = struct{}{}
+ }
+ if scratch.added != nil {
+ delete(scratch.added, r.num)
+ }
+ }
+
+ // New tables.
+ for _, r := range r.addedTables {
+ scratch := p.getScratch(r.level)
+ if scratch.added == nil {
+ scratch.added = make(map[int64]atRecord)
+ }
+ scratch.added[r.num] = r
+ if scratch.deleted != nil {
+ delete(scratch.deleted, r.num)
+ }
+ }
+}
+
+func (p *versionStaging) finish() *version {
+ // Build new version.
+ nv := newVersion(p.base.s)
+ numLevel := len(p.levels)
+ if len(p.base.levels) > numLevel {
+ numLevel = len(p.base.levels)
+ }
+ nv.levels = make([]tFiles, numLevel)
+ for level := 0; level < numLevel; level++ {
+ var baseTabels tFiles
+ if level < len(p.base.levels) {
+ baseTabels = p.base.levels[level]
+ }
+
+ if level < len(p.levels) {
+ scratch := p.levels[level]
+
+ var nt tFiles
+ // Prealloc list if possible.
+ if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
+ nt = make(tFiles, 0, n)
+ }
+
+ // Base tables.
+ for _, t := range baseTabels {
+ if _, ok := scratch.deleted[t.fd.Num]; ok {
+ continue
+ }
+ if _, ok := scratch.added[t.fd.Num]; ok {
+ continue
+ }
+ nt = append(nt, t)
+ }
+
+ // New tables.
+ for _, r := range scratch.added {
+ nt = append(nt, tableFileFromRecord(r))
+ }
+
+ if len(nt) != 0 {
+ // Sort tables.
+ if level == 0 {
+ nt.sortByNum()
+ } else {
+ nt.sortByKey(p.base.s.icmp)
+ }
+
+ nv.levels[level] = nt
+ }
+ } else {
+ nv.levels[level] = baseTabels
+ }
+ }
+
+ // Trim levels.
+ n := len(nv.levels)
+ for ; n > 0 && nv.levels[n-1] == nil; n-- {
+ }
+ nv.levels = nv.levels[:n]
+
+ // Compute compaction score for new version.
+ nv.computeCompaction()
+
+ return nv
+}
+
+type versionReleaser struct {
+ v *version
+ once bool
+}
+
+func (vr *versionReleaser) Release() {
+ v := vr.v
+ v.s.vmu.Lock()
+ if !vr.once {
+ v.releaseNB()
+ vr.once = true
+ }
+ v.s.vmu.Unlock()
+}
diff --git a/vendor/github.com/wvanbergen/kazoo-go/.gitignore b/vendor/github.com/wvanbergen/kazoo-go/.gitignore
new file mode 100644
index 00000000..cc8fabd6
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/.gitignore
@@ -0,0 +1,2 @@
+kazoo.test
+confluent/
diff --git a/vendor/github.com/wvanbergen/kazoo-go/.travis.yml b/vendor/github.com/wvanbergen/kazoo-go/.travis.yml
new file mode 100644
index 00000000..f6328bef
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/.travis.yml
@@ -0,0 +1,31 @@
+language: go
+go:
+- 1.8
+- 1.9
+
+env:
+ global:
+ - ZOOKEEPER_PEERS=localhost:2181
+ - DEBUG=true
+
+install:
+- make dependencies
+
+before_script:
+- make confluent/kafka/start
+- make test/create_kafka_topics
+
+script:
+- make test
+- make vet
+- make errcheck
+- make fmt
+
+matrix:
+ include:
+ - go: tip
+ allow_failures:
+ - go: tip
+ fast_finish: true
+
+sudo: false
diff --git a/vendor/github.com/wvanbergen/kazoo-go/MIT-LICENSE b/vendor/github.com/wvanbergen/kazoo-go/MIT-LICENSE
new file mode 100644
index 00000000..87d42803
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/MIT-LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Willem van Bergen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/wvanbergen/kazoo-go/Makefile b/vendor/github.com/wvanbergen/kazoo-go/Makefile
new file mode 100644
index 00000000..87ac0652
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/Makefile
@@ -0,0 +1,84 @@
+.PHONY: confluent/kafka/* confluent/zookeeper/* confluent/registry/* confluent/start confluent/stop fmt vet errcheck test test/create_kafka_topics dependencies dependencies/*
+
+
+default: fmt vet errcheck test
+
+
+# Confluent platform tasks
+
+confluent/start: confluent/rest/start
+
+confluent/stop: confluent/rest/stop confluent/registry/stop confluent/kafka/stop confluent/zookeeper/stop
+
+# Download & extract tasks
+
+confluent/confluent.tgz:
+ mkdir -p confluent && wget http://packages.confluent.io/archive/3.0/confluent-3.0.1-2.11.tar.gz -O confluent/confluent.tgz
+
+confluent/EXTRACTED: confluent/confluent.tgz
+ tar xzf confluent/confluent.tgz -C confluent --strip-components 1 && mkdir confluent/logs && touch confluent/EXTRACTED
+ echo "delete.topic.enable=true" >> confluent/etc/kafka/server.properties
+
+# Zookeeper tasks
+
+confluent/zookeeper/start: confluent/EXTRACTED
+ nohup confluent/bin/zookeeper-server-start confluent/etc/kafka/zookeeper.properties 2> confluent/logs/zookeeper.err > confluent/logs/zookeeper.out < /dev/null &
+ while ! nc localhost 2181 confluent/logs/kafka.err > confluent/logs/kafka.out < /dev/null &
+ while ! nc localhost 9092 confluent/logs/schema-registry.err > confluent/logs/schema-registry.out < /dev/null &
+ while ! nc localhost 8081 confluent/logs/kafka-rest.err > confluent/logs/kafka-rest.out < /dev/null &
+ while ! nc localhost 8082 0 {
+ return ErrRunningInstances
+ }
+
+ return cg.kz.deleteRecursive(fmt.Sprintf("%s/consumers/%s", cg.kz.conf.Chroot, cg.Name))
+}
+
+// Instances returns a map of all running instances inside this consumergroup.
+func (cg *Consumergroup) Instances() (ConsumergroupInstanceList, error) {
+ root := fmt.Sprintf("%s/consumers/%s/ids", cg.kz.conf.Chroot, cg.Name)
+ cgis, _, err := cg.kz.conn.Children(root)
+ if err != nil {
+ if err == zk.ErrNoNode {
+ result := make(ConsumergroupInstanceList, 0)
+ return result, nil
+ }
+ return nil, err
+ }
+
+ result := make(ConsumergroupInstanceList, 0, len(cgis))
+ for _, cgi := range cgis {
+ result = append(result, cg.Instance(cgi))
+ }
+ return result, nil
+}
+
+// WatchInstances returns a ConsumergroupInstanceList, and a channel that will be closed
+// as soon the instance list changes.
+func (cg *Consumergroup) WatchInstances() (ConsumergroupInstanceList, <-chan zk.Event, error) {
+ node := fmt.Sprintf("%s/consumers/%s/ids", cg.kz.conf.Chroot, cg.Name)
+ cgis, _, c, err := cg.kz.conn.ChildrenW(node)
+ if err != nil {
+ if err != zk.ErrNoNode {
+ return nil, nil, err
+ }
+ if err := cg.kz.mkdirRecursive(node); err != nil {
+ return nil, nil, err
+ }
+ if cgis, _, c, err = cg.kz.conn.ChildrenW(node); err != nil {
+ return nil, nil, err
+ }
+ }
+
+ result := make(ConsumergroupInstanceList, 0, len(cgis))
+ for _, cgi := range cgis {
+ result = append(result, cg.Instance(cgi))
+ }
+
+ return result, c, nil
+}
+
+// NewInstance instantiates a new ConsumergroupInstance inside this consumer group,
+// using a newly generated ID.
+func (cg *Consumergroup) NewInstance() *ConsumergroupInstance {
+ id, err := generateConsumerInstanceID()
+ if err != nil {
+ panic(err)
+ }
+ return cg.Instance(id)
+}
+
+// Instance instantiates a new ConsumergroupInstance inside this consumer group,
+// using an existing ID.
+func (cg *Consumergroup) Instance(id string) *ConsumergroupInstance {
+ return &ConsumergroupInstance{cg: cg, ID: id}
+}
+
+// PartitionOwner returns the ConsumergroupInstance that has claimed the given partition.
+// This can be nil if nobody has claimed it yet.
+func (cg *Consumergroup) PartitionOwner(topic string, partition int32) (*ConsumergroupInstance, error) {
+ node := fmt.Sprintf("%s/consumers/%s/owners/%s/%d", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ val, _, err := cg.kz.conn.Get(node)
+
+ // If the node does not exists, nobody has claimed it.
+ switch err {
+ case nil:
+ return &ConsumergroupInstance{cg: cg, ID: string(val)}, nil
+ case zk.ErrNoNode:
+ return nil, nil
+ default:
+ return nil, err
+ }
+}
+
+// WatchPartitionOwner retrieves what instance is currently owning the partition, and sets a
+// Zookeeper watch to be notified of changes. If the partition currently does not have an owner,
+// the function returns nil for every return value. In this case is should be safe to claim
+// the partition for an instance.
+func (cg *Consumergroup) WatchPartitionOwner(topic string, partition int32) (*ConsumergroupInstance, <-chan zk.Event, error) {
+ node := fmt.Sprintf("%s/consumers/%s/owners/%s/%d", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ instanceID, _, changed, err := cg.kz.conn.GetW(node)
+
+ switch err {
+ case nil:
+ return &ConsumergroupInstance{cg: cg, ID: string(instanceID)}, changed, nil
+
+ case zk.ErrNoNode:
+ return nil, nil, nil
+
+ default:
+ return nil, nil, err
+ }
+}
+
+// Registered checks whether the consumergroup instance is registered in Zookeeper.
+func (cgi *ConsumergroupInstance) Registered() (bool, error) {
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ return cgi.cg.kz.exists(node)
+}
+
+// Registered returns current registration of the consumer group instance.
+func (cgi *ConsumergroupInstance) Registration() (*Registration, error) {
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ val, _, err := cgi.cg.kz.conn.Get(node)
+ if err != nil {
+ if err == zk.ErrNoNode {
+ return nil, ErrInstanceNotRegistered
+ }
+ return nil, err
+ }
+
+ reg := &Registration{}
+ if err := json.Unmarshal(val, reg); err != nil {
+ return nil, err
+ }
+ return reg, nil
+}
+
+// WatchRegistered returns current registration of the consumer group instance,
+// and a channel that will be closed as soon the registration changes.
+func (cgi *ConsumergroupInstance) WatchRegistration() (*Registration, <-chan zk.Event, error) {
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ val, _, c, err := cgi.cg.kz.conn.GetW(node)
+ if err != nil {
+ if err == zk.ErrNoNode {
+ return nil, nil, ErrInstanceNotRegistered
+ }
+ return nil, nil, err
+ }
+
+ reg := &Registration{}
+ if err := json.Unmarshal(val, reg); err != nil {
+ return nil, nil, err
+ }
+ return reg, c, nil
+}
+
+// RegisterSubscription registers the consumer instance in Zookeeper, with its subscription.
+func (cgi *ConsumergroupInstance) RegisterWithSubscription(subscriptionJSON []byte) error {
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ err := cgi.cg.kz.create(node, subscriptionJSON, true)
+ if err == zk.ErrNodeExists {
+ return ErrInstanceAlreadyRegistered
+ }
+ return err
+}
+
+// Register registers the consumergroup instance in Zookeeper.
+func (cgi *ConsumergroupInstance) Register(topics []string) error {
+ subscriptionJSON, err := cgi.marshalSubscription(topics)
+ if err != nil {
+ return err
+ }
+
+ return cgi.RegisterWithSubscription(subscriptionJSON)
+}
+
+// UpdateRegistration updates a consumer group member registration. If the
+// consumer group member has not been registered yet, then an error is returned.
+func (cgi *ConsumergroupInstance) UpdateRegistration(topics []string) error {
+ subscriptionJSON, err := cgi.marshalSubscription(topics)
+ if err != nil {
+ return err
+ }
+
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ _, stat, err := cgi.cg.kz.conn.Get(node)
+ if err != nil {
+ if err == zk.ErrNoNode {
+ return ErrInstanceNotRegistered
+ }
+ return err
+ }
+
+ _, err = cgi.cg.kz.conn.Set(node, subscriptionJSON, stat.Version)
+ if err == zk.ErrNoNode {
+ return ErrInstanceNotRegistered
+ }
+ return err
+}
+
+// Register registers the consumergroup instance in Zookeeper.
+func (cgi *ConsumergroupInstance) marshalSubscription(topics []string) ([]byte, error) {
+ subscription := make(map[string]int)
+ for _, topic := range topics {
+ subscription[topic] = 1
+ }
+ data, err := json.Marshal(&Registration{
+ Pattern: RegPatternStatic,
+ Subscription: subscription,
+ Timestamp: time.Now().Unix(),
+ Version: RegDefaultVersion,
+ })
+ if err != nil {
+ return nil, err
+ }
+ return data, nil
+}
+
+// Deregister removes the registration of the instance from zookeeper.
+func (cgi *ConsumergroupInstance) Deregister() error {
+ node := fmt.Sprintf("%s/consumers/%s/ids/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, cgi.ID)
+ exists, stat, err := cgi.cg.kz.conn.Exists(node)
+ if err != nil {
+ return err
+ } else if !exists {
+ return ErrInstanceNotRegistered
+ }
+
+ err = cgi.cg.kz.conn.Delete(node, stat.Version)
+ if err == zk.ErrNoNode {
+ return ErrInstanceNotRegistered
+ }
+ return err
+}
+
+// Claim claims a topic/partition ownership for a consumer ID within a group. If the
+// partition is already claimed by another running instance, it will return ErrAlreadyClaimed.
+func (cgi *ConsumergroupInstance) ClaimPartition(topic string, partition int32) error {
+ root := fmt.Sprintf("%s/consumers/%s/owners/%s", cgi.cg.kz.conf.Chroot, cgi.cg.Name, topic)
+ if err := cgi.cg.kz.mkdirRecursive(root); err != nil {
+ return err
+ }
+
+ // Create an ephemeral node for the partition to claim the partition for this instance
+ node := fmt.Sprintf("%s/%d", root, partition)
+ err := cgi.cg.kz.create(node, []byte(cgi.ID), true)
+ switch err {
+ case zk.ErrNodeExists:
+ data, _, err := cgi.cg.kz.conn.Get(node)
+ if err != nil {
+ return err
+ }
+ if string(data) != cgi.ID {
+ // Return a separate error for this, to allow for implementing a retry mechanism.
+ return ErrPartitionClaimedByOther
+ }
+ return nil
+ default:
+ return err
+ }
+}
+
+// ReleasePartition releases a claim to a partition.
+func (cgi *ConsumergroupInstance) ReleasePartition(topic string, partition int32) error {
+ owner, err := cgi.cg.PartitionOwner(topic, partition)
+ if err != nil {
+ return err
+ }
+ if owner == nil || owner.ID != cgi.ID {
+ return ErrPartitionNotClaimed
+ }
+
+ node := fmt.Sprintf("%s/consumers/%s/owners/%s/%d", cgi.cg.kz.conf.Chroot, cgi.cg.Name, topic, partition)
+ return cgi.cg.kz.conn.Delete(node, 0)
+}
+
+// Topics retrieves the list of topics the consumergroup has claimed ownership of at some point.
+func (cg *Consumergroup) Topics() (TopicList, error) {
+ root := fmt.Sprintf("%s/consumers/%s/owners", cg.kz.conf.Chroot, cg.Name)
+ children, _, err := cg.kz.conn.Children(root)
+ if err != nil {
+ return nil, err
+ }
+
+ result := make(TopicList, 0, len(children))
+ for _, name := range children {
+ result = append(result, cg.kz.Topic(name))
+ }
+ return result, nil
+}
+
+// CommitOffset commits an offset to a group/topic/partition
+func (cg *Consumergroup) CommitOffset(topic string, partition int32, offset int64) error {
+ node := fmt.Sprintf("%s/consumers/%s/offsets/%s/%d", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ data := []byte(fmt.Sprintf("%d", offset))
+
+ _, stat, err := cg.kz.conn.Get(node)
+ switch err {
+ case zk.ErrNoNode: // Create a new node
+ return cg.kz.create(node, data, false)
+
+ case nil: // Update the existing node
+ _, err := cg.kz.conn.Set(node, data, stat.Version)
+ return err
+
+ default:
+ return err
+ }
+}
+
+// FetchOffset retrieves an offset to a group/topic/partition
+func (cg *Consumergroup) FetchOffset(topic string, partition int32) (int64, error) {
+ node := fmt.Sprintf("%s/consumers/%s/offsets/%s/%d", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ val, _, err := cg.kz.conn.Get(node)
+ if err == zk.ErrNoNode {
+ return -1, nil
+ } else if err != nil {
+ return -1, err
+ }
+ return strconv.ParseInt(string(val), 10, 64)
+}
+
+// FetchOffset retrieves all the commmitted offsets for a group
+func (cg *Consumergroup) FetchAllOffsets() (map[string]map[int32]int64, error) {
+ result := make(map[string]map[int32]int64)
+
+ offsetsNode := fmt.Sprintf("%s/consumers/%s/offsets", cg.kz.conf.Chroot, cg.Name)
+ topics, _, err := cg.kz.conn.Children(offsetsNode)
+ if err == zk.ErrNoNode {
+ return result, nil
+ } else if err != nil {
+ return nil, err
+ }
+
+ for _, topic := range topics {
+ result[topic] = make(map[int32]int64)
+ topicNode := fmt.Sprintf("%s/consumers/%s/offsets/%s", cg.kz.conf.Chroot, cg.Name, topic)
+ partitions, _, err := cg.kz.conn.Children(topicNode)
+ if err != nil {
+ return nil, err
+ }
+
+ for _, partition := range partitions {
+ partitionNode := fmt.Sprintf("%s/consumers/%s/offsets/%s/%s", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ val, _, err := cg.kz.conn.Get(partitionNode)
+ if err != nil {
+ return nil, err
+ }
+
+ partition, err := strconv.ParseInt(partition, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+
+ offset, err := strconv.ParseInt(string(val), 10, 64)
+ if err != nil {
+ return nil, err
+ }
+
+ result[topic][int32(partition)] = offset
+ }
+ }
+
+ return result, nil
+}
+
+func (cg *Consumergroup) ResetOffsets() error {
+ offsetsNode := fmt.Sprintf("%s/consumers/%s/offsets", cg.kz.conf.Chroot, cg.Name)
+ topics, _, err := cg.kz.conn.Children(offsetsNode)
+ if err == zk.ErrNoNode {
+ return nil
+ } else if err != nil {
+ return err
+ }
+
+ for _, topic := range topics {
+ topicNode := fmt.Sprintf("%s/consumers/%s/offsets/%s", cg.kz.conf.Chroot, cg.Name, topic)
+ partitions, stat, err := cg.kz.conn.Children(topicNode)
+ if err != nil {
+ return err
+ }
+
+ for _, partition := range partitions {
+ partitionNode := fmt.Sprintf("%s/consumers/%s/offsets/%s/%s", cg.kz.conf.Chroot, cg.Name, topic, partition)
+ exists, stat, err := cg.kz.conn.Exists(partitionNode)
+ if exists {
+ if err = cg.kz.conn.Delete(partitionNode, stat.Version); err != nil {
+ if err != zk.ErrNoNode {
+ return err
+ }
+ }
+ }
+ }
+
+ if err := cg.kz.conn.Delete(topicNode, stat.Version); err != nil {
+ if err != zk.ErrNoNode {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+// generateUUID Generates a UUIDv4.
+func generateUUID() (string, error) {
+ uuid := make([]byte, 16)
+ n, err := io.ReadFull(rand.Reader, uuid)
+ if n != len(uuid) || err != nil {
+ return "", err
+ }
+ // variant bits; see section 4.1.1
+ uuid[8] = uuid[8]&^0xc0 | 0x80
+ // version 4 (pseudo-random); see section 4.1.3
+ uuid[6] = uuid[6]&^0xf0 | 0x40
+ return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:]), nil
+}
+
+// generateConsumerInstanceID generates a consumergroup Instance ID
+// that is almost certain to be unique.
+func generateConsumerInstanceID() (string, error) {
+ uuid, err := generateUUID()
+ if err != nil {
+ return "", err
+ }
+
+ hostname, err := os.Hostname()
+ if err != nil {
+ return "", err
+ }
+
+ return fmt.Sprintf("%s:%s", hostname, uuid), nil
+}
+
+// Find returns the consumergroup with the given name if it exists in the list.
+// Otherwise it will return `nil`.
+func (cgl ConsumergroupList) Find(name string) *Consumergroup {
+ for _, cg := range cgl {
+ if cg.Name == name {
+ return cg
+ }
+ }
+ return nil
+}
+
+func (cgl ConsumergroupList) Len() int {
+ return len(cgl)
+}
+
+func (cgl ConsumergroupList) Less(i, j int) bool {
+ return cgl[i].Name < cgl[j].Name
+}
+
+func (cgl ConsumergroupList) Swap(i, j int) {
+ cgl[i], cgl[j] = cgl[j], cgl[i]
+}
+
+// Find returns the consumergroup instance with the given ID if it exists in the list.
+// Otherwise it will return `nil`.
+func (cgil ConsumergroupInstanceList) Find(id string) *ConsumergroupInstance {
+ for _, cgi := range cgil {
+ if cgi.ID == id {
+ return cgi
+ }
+ }
+ return nil
+}
+
+func (cgil ConsumergroupInstanceList) Len() int {
+ return len(cgil)
+}
+
+func (cgil ConsumergroupInstanceList) Less(i, j int) bool {
+ return cgil[i].ID < cgil[j].ID
+}
+
+func (cgil ConsumergroupInstanceList) Swap(i, j int) {
+ cgil[i], cgil[j] = cgil[j], cgil[i]
+}
diff --git a/vendor/github.com/wvanbergen/kazoo-go/kazoo.go b/vendor/github.com/wvanbergen/kazoo-go/kazoo.go
new file mode 100644
index 00000000..4465b451
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/kazoo.go
@@ -0,0 +1,279 @@
+package kazoo
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "path"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/samuel/go-zookeeper/zk"
+)
+
+var (
+ FailedToClaimPartition = errors.New("Failed to claim partition for this consumer instance. Do you have a rogue consumer running?")
+)
+
+// ParseConnectionString parses a zookeeper connection string in the form of
+// host1:2181,host2:2181/chroot and returns the list of servers, and the chroot.
+func ParseConnectionString(zookeeper string) (nodes []string, chroot string) {
+ nodesAndChroot := strings.SplitN(zookeeper, "/", 2)
+ if len(nodesAndChroot) == 2 {
+ chroot = fmt.Sprintf("/%s", nodesAndChroot[1])
+ }
+ nodes = strings.Split(nodesAndChroot[0], ",")
+ return
+}
+
+// BuildConnectionString builds a Zookeeper connection string for a list of nodes.
+// Returns a string like "zk1:2181,zk2:2181,zk3:2181"
+func BuildConnectionString(nodes []string) string {
+ return strings.Join(nodes, ",")
+}
+
+// ConnectionStringWithChroot builds a Zookeeper connection string for a list
+// of nodes and a chroot. The chroot should start with "/".
+// Returns a string like "zk1:2181,zk2:2181,zk3:2181/chroot"
+func BuildConnectionStringWithChroot(nodes []string, chroot string) string {
+ return fmt.Sprintf("%s%s", strings.Join(nodes, ","), chroot)
+}
+
+// Kazoo interacts with the Kafka metadata in Zookeeper
+type Kazoo struct {
+ conn *zk.Conn
+ conf *Config
+}
+
+// Config holds configuration values f.
+type Config struct {
+ // The chroot the Kafka installation is registerde under. Defaults to "".
+ Chroot string
+
+ // The amount of time the Zookeeper client can be disconnected from the Zookeeper cluster
+ // before the cluster will get rid of watches and ephemeral nodes. Defaults to 1 second.
+ Timeout time.Duration
+
+ // Logger
+ Logger zk.Logger
+}
+
+// NewConfig instantiates a new Config struct with sane defaults.
+func NewConfig() *Config {
+ return &Config{
+ Timeout: 1 * time.Second,
+ Logger: zk.DefaultLogger,
+ }
+}
+
+// NewKazoo creates a new connection instance
+func NewKazoo(servers []string, conf *Config) (*Kazoo, error) {
+ if conf == nil {
+ conf = NewConfig()
+ }
+
+ conn, _, err := zk.Connect(
+ servers,
+ conf.Timeout,
+ func(c *zk.Conn) { c.SetLogger(conf.Logger) },
+ )
+
+ if err != nil {
+ return nil, err
+ }
+ return &Kazoo{conn, conf}, nil
+}
+
+// NewKazooFromConnectionString creates a new connection instance
+// based on a zookeeer connection string that can include a chroot.
+func NewKazooFromConnectionString(connectionString string, conf *Config) (*Kazoo, error) {
+ if conf == nil {
+ conf = NewConfig()
+ }
+
+ nodes, chroot := ParseConnectionString(connectionString)
+ conf.Chroot = chroot
+ return NewKazoo(nodes, conf)
+}
+
+// Brokers returns a map of all the brokers that make part of the
+// Kafka cluster that is registered in Zookeeper.
+func (kz *Kazoo) Brokers() (map[int32]string, error) {
+ root := fmt.Sprintf("%s/brokers/ids", kz.conf.Chroot)
+ children, _, err := kz.conn.Children(root)
+ if err != nil {
+ return nil, err
+ }
+
+ type brokerEntry struct {
+ Host string `json:"host"`
+ Port int `json:"port"`
+ }
+
+ result := make(map[int32]string)
+ for _, child := range children {
+ brokerID, err := strconv.ParseInt(child, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+
+ value, _, err := kz.conn.Get(path.Join(root, child))
+ if err != nil {
+ return nil, err
+ }
+
+ var brokerNode brokerEntry
+ if err := json.Unmarshal(value, &brokerNode); err != nil {
+ return nil, err
+ }
+
+ result[int32(brokerID)] = fmt.Sprintf("%s:%d", brokerNode.Host, brokerNode.Port)
+ }
+
+ return result, nil
+}
+
+// BrokerList returns a slice of broker addresses that can be used to connect to
+// the Kafka cluster, e.g. using `sarama.NewAsyncProducer()`.
+func (kz *Kazoo) BrokerList() ([]string, error) {
+ brokers, err := kz.Brokers()
+ if err != nil {
+ return nil, err
+ }
+
+ result := make([]string, 0, len(brokers))
+ for _, broker := range brokers {
+ result = append(result, broker)
+ }
+
+ return result, nil
+}
+
+// BrokerIDList returns a sorted slice of broker ids that can be used for manipulating topics and partitions.`.
+func (kz *Kazoo) brokerIDList() ([]int32, error) {
+ brokers, err := kz.Brokers()
+ if err != nil {
+ return nil, err
+ }
+
+ result := make([]int32, 0, len(brokers))
+ for id := range brokers {
+ result = append(result, id)
+ }
+
+ // return sorted list to match the offical kafka sdks
+ sort.Sort(int32Slice(result))
+
+ return result, nil
+}
+
+// Controller returns what broker is currently acting as controller of the Kafka cluster
+func (kz *Kazoo) Controller() (int32, error) {
+ type controllerEntry struct {
+ BrokerID int32 `json:"brokerid"`
+ }
+
+ node := fmt.Sprintf("%s/controller", kz.conf.Chroot)
+ data, _, err := kz.conn.Get(node)
+ if err != nil {
+ return -1, err
+ }
+
+ var controllerNode controllerEntry
+ if err := json.Unmarshal(data, &controllerNode); err != nil {
+ return -1, err
+ }
+
+ return controllerNode.BrokerID, nil
+}
+
+// Close closes the connection with the Zookeeper cluster
+func (kz *Kazoo) Close() error {
+ kz.conn.Close()
+ return nil
+}
+
+////////////////////////////////////////////////////////////////////////
+// Util methods
+////////////////////////////////////////////////////////////////////////
+
+// Exists checks existence of a node
+func (kz *Kazoo) exists(node string) (ok bool, err error) {
+ ok, _, err = kz.conn.Exists(node)
+ return
+}
+
+// DeleteAll deletes a node recursively
+func (kz *Kazoo) deleteRecursive(node string) (err error) {
+ children, stat, err := kz.conn.Children(node)
+ if err == zk.ErrNoNode {
+ return nil
+ } else if err != nil {
+ return
+ }
+
+ for _, child := range children {
+ if err = kz.deleteRecursive(path.Join(node, child)); err != nil {
+ return
+ }
+ }
+
+ return kz.conn.Delete(node, stat.Version)
+}
+
+// MkdirAll creates a directory recursively
+func (kz *Kazoo) mkdirRecursive(node string) (err error) {
+ parent := path.Dir(node)
+ if parent != "/" {
+ if err = kz.mkdirRecursive(parent); err != nil {
+ return
+ }
+ }
+
+ exists, _, err := kz.conn.Exists(node)
+ if err != nil {
+ return
+ }
+
+ if !exists {
+ _, err = kz.conn.Create(node, nil, 0, zk.WorldACL(zk.PermAll))
+ return
+ }
+
+ return
+}
+
+// Create stores a new value at node. Fails if already set.
+func (kz *Kazoo) create(node string, value []byte, ephemeral bool) (err error) {
+ if err = kz.mkdirRecursive(path.Dir(node)); err != nil {
+ return
+ }
+
+ flags := int32(0)
+ if ephemeral {
+ flags = zk.FlagEphemeral
+ }
+ _, err = kz.conn.Create(node, value, flags, zk.WorldACL(zk.PermAll))
+ return
+}
+
+// createOrUpdate first attempts to update a node. If the nodes does not exist it will create it.
+func (kz *Kazoo) createOrUpdate(node string, value []byte, ephemeral bool) (err error) {
+ if _, err = kz.conn.Set(node, value, -1); err == nil {
+ return
+ }
+
+ if err == zk.ErrNoNode {
+ err = kz.create(node, value, ephemeral)
+ }
+ return
+}
+
+// sort interface for int32 slice
+type int32Slice []int32
+
+func (s int32Slice) Len() int { return len(s) }
+func (s int32Slice) Less(i, j int) bool { return s[i] < s[j] }
+func (s int32Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
diff --git a/vendor/github.com/wvanbergen/kazoo-go/topic_admin.go b/vendor/github.com/wvanbergen/kazoo-go/topic_admin.go
new file mode 100644
index 00000000..2911441b
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/topic_admin.go
@@ -0,0 +1,130 @@
+package kazoo
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ "github.com/samuel/go-zookeeper/zk"
+)
+
+var (
+ ErrTopicExists = errors.New("Topic already exists")
+ ErrTopicMarkedForDelete = errors.New("Topic is already marked for deletion")
+ ErrDeletionTimedOut = errors.New("Timed out while waiting for a topic to be deleted")
+)
+
+// CreateTopic creates a new kafka topic with the specified parameters and properties
+func (kz *Kazoo) CreateTopic(name string, partitionCount int, replicationFactor int, topicConfig map[string]string) error {
+ topic := kz.Topic(name)
+
+ // Official kafka sdk checks if topic exists, then always writes the config unconditionally
+ // but only writes the partition map if ones does not exist.
+ exists, err := topic.Exists()
+ if err != nil {
+ return err
+ } else if exists {
+ return ErrTopicExists
+ }
+
+ brokerList, err := kz.brokerIDList()
+ if err != nil {
+ return err
+ }
+
+ partitionList, err := topic.generatePartitionAssignments(brokerList, partitionCount, replicationFactor)
+ if err != nil {
+ return err
+ }
+
+ configData, err := topic.marshalConfig(topicConfig)
+ if err != nil {
+ return err
+ }
+
+ partitionData, err := topic.marshalPartitions(partitionList)
+ if err != nil {
+ return err
+ }
+
+ if err = kz.createOrUpdate(topic.configPath(), configData, false); err != nil {
+ return err
+ }
+
+ if err = kz.create(topic.metadataPath(), partitionData, false); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// DeleteTopic marks a kafka topic for deletion. Deleting a topic is asynchronous and
+// DeleteTopic will return before Kafka actually does the deletion.
+func (kz *Kazoo) DeleteTopic(name string) error {
+ node := fmt.Sprintf("%s/admin/delete_topics/%s", kz.conf.Chroot, name)
+
+ exists, err := kz.exists(node)
+ if err != nil {
+ return err
+ }
+ if exists {
+ return ErrTopicMarkedForDelete
+ }
+
+ if err := kz.create(node, nil, false); err != nil {
+ return err
+ }
+ return nil
+}
+
+// DeleteTopicSync marks a kafka topic for deletion and waits until it is deleted
+// before returning.
+func (kz *Kazoo) DeleteTopicSync(name string, timeout time.Duration) error {
+ err := kz.DeleteTopic(name)
+
+ if err != nil {
+ return err
+ }
+
+ topic := kz.Topic(name)
+
+ if exists, err := topic.Exists(); err != nil {
+ return err
+ } else if !exists {
+ return nil
+ }
+
+ changes, err := topic.Watch()
+
+ if err != nil {
+ return nil
+ }
+
+ if timeout > 0 {
+
+ timer := time.NewTimer(timeout)
+ defer timer.Stop()
+
+ for {
+ select {
+ case <-timer.C:
+ return ErrDeletionTimedOut
+
+ case c := <-changes:
+ if c.Type == zk.EventNodeDeleted {
+ return nil
+ }
+ }
+ }
+
+ } else {
+ for {
+ select {
+ case c := <-changes:
+ if c.Type == zk.EventNodeDeleted {
+ return nil
+ }
+ }
+ }
+ }
+}
diff --git a/vendor/github.com/wvanbergen/kazoo-go/topic_metadata.go b/vendor/github.com/wvanbergen/kazoo-go/topic_metadata.go
new file mode 100644
index 00000000..66a6b0c4
--- /dev/null
+++ b/vendor/github.com/wvanbergen/kazoo-go/topic_metadata.go
@@ -0,0 +1,413 @@
+package kazoo
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "math/rand"
+ "strconv"
+
+ "github.com/samuel/go-zookeeper/zk"
+)
+
+var (
+ ErrInvalidPartitionCount = errors.New("Number of partitions must be larger than 0")
+ ErrInvalidReplicationFactor = errors.New("Replication factor must be between 1 and the number of brokers")
+ ErrInvalidReplicaCount = errors.New("All partitions must have the same number of replicas")
+ ErrReplicaBrokerOverlap = errors.New("All replicas for a partition must be on separate brokers")
+ ErrInvalidBroker = errors.New("Replica assigned to invalid broker")
+ ErrMissingPartitionID = errors.New("Partition ids must be sequential starting from 0")
+ ErrDuplicatePartitionID = errors.New("Each partition must have a unique ID")
+)
+
+// Topic interacts with Kafka's topic metadata in Zookeeper.
+type Topic struct {
+ Name string
+ kz *Kazoo
+}
+
+// TopicList is a type that implements the sortable interface for a list of Topic instances.
+type TopicList []*Topic
+
+// Partition interacts with Kafka's partition metadata in Zookeeper.
+type Partition struct {
+ topic *Topic
+ ID int32
+ Replicas []int32
+}
+
+// PartitionList is a type that implements the sortable interface for a list of Partition instances
+type PartitionList []*Partition
+
+// Topics returns a list of all registered Kafka topics.
+func (kz *Kazoo) Topics() (TopicList, error) {
+ root := fmt.Sprintf("%s/brokers/topics", kz.conf.Chroot)
+ children, _, err := kz.conn.Children(root)
+ if err != nil {
+ return nil, err
+ }
+
+ result := make(TopicList, 0, len(children))
+ for _, name := range children {
+ result = append(result, kz.Topic(name))
+ }
+ return result, nil
+}
+
+// WatchTopics returns a list of all registered Kafka topics, and
+// watches that list for changes.
+func (kz *Kazoo) WatchTopics() (TopicList, <-chan zk.Event, error) {
+ root := fmt.Sprintf("%s/brokers/topics", kz.conf.Chroot)
+ children, _, c, err := kz.conn.ChildrenW(root)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ result := make(TopicList, 0, len(children))
+ for _, name := range children {
+ result = append(result, kz.Topic(name))
+ }
+ return result, c, nil
+}
+
+// Topic returns a Topic instance for a given topic name
+func (kz *Kazoo) Topic(topic string) *Topic {
+ return &Topic{Name: topic, kz: kz}
+}
+
+// Exists returns true if the topic exists on the Kafka cluster.
+func (t *Topic) Exists() (bool, error) {
+ return t.kz.exists(t.metadataPath())
+}
+
+// Partitions returns a list of all partitions for the topic.
+func (t *Topic) Partitions() (PartitionList, error) {
+ value, _, err := t.kz.conn.Get(t.metadataPath())
+ if err != nil {
+ return nil, err
+ }
+
+ return t.parsePartitions(value)
+}
+
+// WatchPartitions returns a list of all partitions for the topic, and watches the topic for changes.
+func (t *Topic) WatchPartitions() (PartitionList, <-chan zk.Event, error) {
+ value, _, c, err := t.kz.conn.GetW(t.metadataPath())
+ if err != nil {
+ return nil, nil, err
+ }
+
+ list, err := t.parsePartitions(value)
+ return list, c, err
+}
+
+// Watch watches the topic for changes.
+func (t *Topic) Watch() (<-chan zk.Event, error) {
+ _, _, c, err := t.kz.conn.GetW(t.metadataPath())
+ if err != nil {
+ return nil, err
+ }
+
+ return c, err
+}
+
+type topicMetadata struct {
+ Version int `json:"version"`
+ Partitions map[string][]int32 `json:"partitions"`
+}
+
+func (t *Topic) metadataPath() string {
+ return fmt.Sprintf("%s/brokers/topics/%s", t.kz.conf.Chroot, t.Name)
+}
+
+// parsePartitions parses the JSON representation of the partitions
+// that is stored as data on the topic node in Zookeeper.
+func (t *Topic) parsePartitions(value []byte) (PartitionList, error) {
+ var tm topicMetadata
+ if err := json.Unmarshal(value, &tm); err != nil {
+ return nil, err
+ }
+
+ result := make(PartitionList, len(tm.Partitions))
+ for partitionNumber, replicas := range tm.Partitions {
+ partitionID, err := strconv.ParseInt(partitionNumber, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+
+ replicaIDs := make([]int32, 0, len(replicas))
+ for _, r := range replicas {
+ replicaIDs = append(replicaIDs, int32(r))
+ }
+ result[partitionID] = t.Partition(int32(partitionID), replicaIDs)
+ }
+
+ return result, nil
+}
+
+// marshalPartitions turns a PartitionList into the JSON representation
+// to be stored in Zookeeper.
+func (t *Topic) marshalPartitions(partitions PartitionList) ([]byte, error) {
+ tm := topicMetadata{Version: 1, Partitions: make(map[string][]int32, len(partitions))}
+ for _, part := range partitions {
+ tm.Partitions[fmt.Sprintf("%d", part.ID)] = part.Replicas
+ }
+ return json.Marshal(tm)
+}
+
+// generatePartitionAssignments creates a partition list for a topic. The primary replica for
+// each partition is assigned in a round-robin fashion starting at a random broker.
+// Additional replicas are assigned to subsequent brokers to ensure there is no overlap
+func (t *Topic) generatePartitionAssignments(brokers []int32, partitionCount int, replicationFactor int) (PartitionList, error) {
+ if partitionCount <= 0 {
+ return nil, ErrInvalidPartitionCount
+ }
+ if replicationFactor <= 0 || len(brokers) < replicationFactor {
+ return nil, ErrInvalidReplicationFactor
+ }
+
+ result := make(PartitionList, partitionCount)
+
+ brokerCount := len(brokers)
+ brokerIdx := rand.Intn(brokerCount)
+
+ for p := 0; p < partitionCount; p++ {
+ partition := &Partition{topic: t, ID: int32(p), Replicas: make([]int32, replicationFactor)}
+
+ brokerIndices := rand.Perm(len(brokers))[0:replicationFactor]
+
+ for r := 0; r < replicationFactor; r++ {
+ partition.Replicas[r] = brokers[brokerIndices[r]]
+ }
+
+ result[p] = partition
+ brokerIdx = (brokerIdx + 1) % brokerCount
+ }
+
+ return result, nil
+}
+
+// validatePartitionAssignments ensures that all partitions are assigned to valid brokers,
+// have the same number of replicas, and each replica is assigned to a unique broker
+func (t *Topic) validatePartitionAssignments(brokers []int32, assignment PartitionList) error {
+ if len(assignment) == 0 {
+ return ErrInvalidPartitionCount
+ }
+
+ // get the first replica count to compare against. Every partition should have the same.
+ var replicaCount int
+ for _, part := range assignment {
+ replicaCount = len(part.Replicas)
+ break
+ }
+ if replicaCount == 0 {
+ return ErrInvalidReplicationFactor
+ }
+
+ // ensure all ids are unique and sequential
+ maxPartitionID := int32(-1)
+ partitionIDmap := make(map[int32]struct{}, len(assignment))
+
+ for _, part := range assignment {
+ if part == nil {
+ continue
+ }
+ if maxPartitionID < part.ID {
+ maxPartitionID = part.ID
+ }
+ partitionIDmap[part.ID] = struct{}{}
+
+ // all partitions require the same replica count
+ if len(part.Replicas) != replicaCount {
+ return ErrInvalidReplicaCount
+ }
+
+ rset := make(map[int32]struct{}, replicaCount)
+ for _, r := range part.Replicas {
+ // replica must be assigned to a valid broker
+ found := false
+ for _, b := range brokers {
+ if r == b {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return ErrInvalidBroker
+ }
+ rset[r] = struct{}{}
+ }
+ // broker assignments for a partition must be unique
+ if len(rset) != replicaCount {
+ return ErrReplicaBrokerOverlap
+ }
+ }
+
+ // ensure all partitions accounted for
+ if int(maxPartitionID) != len(assignment)-1 {
+ return ErrMissingPartitionID
+ }
+
+ // ensure no duplicate ids
+ if len(partitionIDmap) != len(assignment) {
+ return ErrDuplicatePartitionID
+ }
+
+ return nil
+}
+
+// Partition returns a Partition instance for the topic.
+func (t *Topic) Partition(id int32, replicas []int32) *Partition {
+ return &Partition{ID: id, Replicas: replicas, topic: t}
+}
+
+type topicConfig struct {
+ Version int `json:"version"`
+ ConfigMap map[string]string `json:"config"`
+}
+
+// getConfigPath returns the zk node path for a topic's config
+func (t *Topic) configPath() string {
+ return fmt.Sprintf("%s/config/topics/%s", t.kz.conf.Chroot, t.Name)
+}
+
+// parseConfig parses the json representation of a topic config
+// and returns the configuration values
+func (t *Topic) parseConfig(data []byte) (map[string]string, error) {
+ var cfg topicConfig
+ if err := json.Unmarshal(data, &cfg); err != nil {
+ return nil, err
+ }
+ return cfg.ConfigMap, nil
+}
+
+// marshalConfig turns a config map into the json representation
+// needed for Zookeeper
+func (t *Topic) marshalConfig(data map[string]string) ([]byte, error) {
+ cfg := topicConfig{Version: 1, ConfigMap: data}
+ if cfg.ConfigMap == nil {
+ cfg.ConfigMap = make(map[string]string)
+ }
+ return json.Marshal(&cfg)
+}
+
+// Config returns topic-level configuration settings as a map.
+func (t *Topic) Config() (map[string]string, error) {
+ value, _, err := t.kz.conn.Get(t.configPath())
+ if err != nil {
+ return nil, err
+ }
+
+ return t.parseConfig(value)
+}
+
+// Topic returns the Topic of this partition.
+func (p *Partition) Topic() *Topic {
+ return p.topic
+}
+
+// Key returns a unique identifier for the partition, using the form "topic/partition".
+func (p *Partition) Key() string {
+ return fmt.Sprintf("%s/%d", p.topic.Name, p.ID)
+}
+
+// PreferredReplica returns the preferred replica for this partition.
+func (p *Partition) PreferredReplica() int32 {
+ if len(p.Replicas) > 0 {
+ return p.Replicas[0]
+ } else {
+ return -1
+ }
+}
+
+// Leader returns the broker ID of the broker that is currently the leader for the partition.
+func (p *Partition) Leader() (int32, error) {
+ if state, err := p.state(); err != nil {
+ return -1, err
+ } else {
+ return state.Leader, nil
+ }
+}
+
+// ISR returns the broker IDs of the current in-sync replica set for the partition
+func (p *Partition) ISR() ([]int32, error) {
+ if state, err := p.state(); err != nil {
+ return nil, err
+ } else {
+ return state.ISR, nil
+ }
+}
+
+func (p *Partition) UnderReplicated() (bool, error) {
+ if state, err := p.state(); err != nil {
+ return false, err
+ } else {
+ return len(state.ISR) < len(p.Replicas), nil
+ }
+}
+
+func (p *Partition) UsesPreferredReplica() (bool, error) {
+ if state, err := p.state(); err != nil {
+ return false, err
+ } else {
+ return len(state.ISR) > 0 && state.ISR[0] == p.Replicas[0], nil
+ }
+}
+
+// partitionState represents the partition state as it is stored as JSON
+// in Zookeeper on the partition's state node.
+type partitionState struct {
+ Leader int32 `json:"leader"`
+ ISR []int32 `json:"isr"`
+}
+
+// state retrieves and parses the partition State
+func (p *Partition) state() (partitionState, error) {
+ var state partitionState
+ node := fmt.Sprintf("%s/brokers/topics/%s/partitions/%d/state", p.topic.kz.conf.Chroot, p.topic.Name, p.ID)
+ value, _, err := p.topic.kz.conn.Get(node)
+ if err != nil {
+ return state, err
+ }
+
+ if err := json.Unmarshal(value, &state); err != nil {
+ return state, err
+ }
+
+ return state, nil
+}
+
+// Find returns the topic with the given name if it exists in the topic list,
+// and will return `nil` otherwise.
+func (tl TopicList) Find(name string) *Topic {
+ for _, topic := range tl {
+ if topic.Name == name {
+ return topic
+ }
+ }
+ return nil
+}
+
+func (tl TopicList) Len() int {
+ return len(tl)
+}
+
+func (tl TopicList) Less(i, j int) bool {
+ return tl[i].Name < tl[j].Name
+}
+
+func (tl TopicList) Swap(i, j int) {
+ tl[i], tl[j] = tl[j], tl[i]
+}
+
+func (pl PartitionList) Len() int {
+ return len(pl)
+}
+
+func (pl PartitionList) Less(i, j int) bool {
+ return pl[i].topic.Name < pl[j].topic.Name || (pl[i].topic.Name == pl[j].topic.Name && pl[i].ID < pl[j].ID)
+}
+
+func (pl PartitionList) Swap(i, j int) {
+ pl[i], pl[j] = pl[j], pl[i]
+}
diff --git a/vendor/golang.org/x/sync/AUTHORS b/vendor/golang.org/x/sync/AUTHORS
new file mode 100644
index 00000000..15167cd7
--- /dev/null
+++ b/vendor/golang.org/x/sync/AUTHORS
@@ -0,0 +1,3 @@
+# This source code refers to The Go Authors for copyright purposes.
+# The master list of authors is in the main Go distribution,
+# visible at http://tip.golang.org/AUTHORS.
diff --git a/vendor/golang.org/x/sync/CONTRIBUTORS b/vendor/golang.org/x/sync/CONTRIBUTORS
new file mode 100644
index 00000000..1c4577e9
--- /dev/null
+++ b/vendor/golang.org/x/sync/CONTRIBUTORS
@@ -0,0 +1,3 @@
+# This source code was written by the Go contributors.
+# The master list of contributors is in the main Go distribution,
+# visible at http://tip.golang.org/CONTRIBUTORS.
diff --git a/vendor/golang.org/x/sync/LICENSE b/vendor/golang.org/x/sync/LICENSE
new file mode 100644
index 00000000..6a66aea5
--- /dev/null
+++ b/vendor/golang.org/x/sync/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/sync/PATENTS b/vendor/golang.org/x/sync/PATENTS
new file mode 100644
index 00000000..73309904
--- /dev/null
+++ b/vendor/golang.org/x/sync/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go
new file mode 100644
index 00000000..9857fe53
--- /dev/null
+++ b/vendor/golang.org/x/sync/errgroup/errgroup.go
@@ -0,0 +1,66 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package errgroup provides synchronization, error propagation, and Context
+// cancelation for groups of goroutines working on subtasks of a common task.
+package errgroup
+
+import (
+ "context"
+ "sync"
+)
+
+// A Group is a collection of goroutines working on subtasks that are part of
+// the same overall task.
+//
+// A zero Group is valid and does not cancel on error.
+type Group struct {
+ cancel func()
+
+ wg sync.WaitGroup
+
+ errOnce sync.Once
+ err error
+}
+
+// WithContext returns a new Group and an associated Context derived from ctx.
+//
+// The derived Context is canceled the first time a function passed to Go
+// returns a non-nil error or the first time Wait returns, whichever occurs
+// first.
+func WithContext(ctx context.Context) (*Group, context.Context) {
+ ctx, cancel := context.WithCancel(ctx)
+ return &Group{cancel: cancel}, ctx
+}
+
+// Wait blocks until all function calls from the Go method have returned, then
+// returns the first non-nil error (if any) from them.
+func (g *Group) Wait() error {
+ g.wg.Wait()
+ if g.cancel != nil {
+ g.cancel()
+ }
+ return g.err
+}
+
+// Go calls the given function in a new goroutine.
+//
+// The first call to return a non-nil error cancels the group; its error will be
+// returned by Wait.
+func (g *Group) Go(f func() error) {
+ g.wg.Add(1)
+
+ go func() {
+ defer g.wg.Done()
+
+ if err := f(); err != nil {
+ g.errOnce.Do(func() {
+ g.err = err
+ if g.cancel != nil {
+ g.cancel()
+ }
+ })
+ }
+ }()
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 5407952c..cfc8d752 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -8,6 +8,12 @@ github.com/Shopify/sarama
github.com/alecthomas/gometalinter
# github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
github.com/alecthomas/units
+# github.com/avast/retry-go v2.4.1+incompatible
+github.com/avast/retry-go
+# github.com/bsm/sarama-cluster v2.1.15+incompatible
+github.com/bsm/sarama-cluster
+# github.com/burdiyan/kafkautil v0.0.0-20190131162249-eaf83ed22d5b
+github.com/burdiyan/kafkautil
# github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
github.com/chzyer/readline
# github.com/client9/misspell v0.3.4
@@ -63,6 +69,12 @@ github.com/juju/ansiterm
github.com/juju/ansiterm/tabwriter
# github.com/linkedin/goavro v2.1.0+incompatible
github.com/linkedin/goavro
+# github.com/lovoo/goka v0.1.4
+github.com/lovoo/goka
+github.com/lovoo/goka/kafka
+github.com/lovoo/goka/logger
+github.com/lovoo/goka/multierr
+github.com/lovoo/goka/storage
# github.com/lunixbochs/vtclean v0.0.0-20180621232353-2d01aacdc34a
github.com/lunixbochs/vtclean
# github.com/magiconair/properties v1.8.1
@@ -87,14 +99,33 @@ github.com/pelletier/go-toml
# github.com/pierrec/lz4 v2.0.5+incompatible
github.com/pierrec/lz4
github.com/pierrec/lz4/internal/xxh32
+# github.com/pkg/errors v0.9.1
+github.com/pkg/errors
# github.com/rcrowley/go-metrics v0.0.0-20190706150252-9beb055b7962
github.com/rcrowley/go-metrics
+# github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da
+github.com/samuel/go-zookeeper/zk
# github.com/spf13/cobra v0.0.5
github.com/spf13/cobra
# github.com/spf13/pflag v1.0.3
github.com/spf13/pflag
+# github.com/syndtr/goleveldb v1.0.0
+github.com/syndtr/goleveldb/leveldb
+github.com/syndtr/goleveldb/leveldb/cache
+github.com/syndtr/goleveldb/leveldb/comparer
+github.com/syndtr/goleveldb/leveldb/errors
+github.com/syndtr/goleveldb/leveldb/filter
+github.com/syndtr/goleveldb/leveldb/iterator
+github.com/syndtr/goleveldb/leveldb/journal
+github.com/syndtr/goleveldb/leveldb/memdb
+github.com/syndtr/goleveldb/leveldb/opt
+github.com/syndtr/goleveldb/leveldb/storage
+github.com/syndtr/goleveldb/leveldb/table
+github.com/syndtr/goleveldb/leveldb/util
# github.com/tsenart/deadcode v0.0.0-20160724212837-210d2dc333e9
github.com/tsenart/deadcode
+# github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a
+github.com/wvanbergen/kazoo-go
# github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c
github.com/xdg/scram
# github.com/xdg/stringprep v1.0.0
@@ -107,6 +138,8 @@ golang.org/x/lint
# golang.org/x/net v0.0.0-20190628185345-da137c7871d7
golang.org/x/net/internal/socks
golang.org/x/net/proxy
+# golang.org/x/sync v0.0.0-20190423024810-112230192c58
+golang.org/x/sync/errgroup
# golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7
golang.org/x/sys/unix
# golang.org/x/text v0.3.0