Skip to content

Commit 3619202

Browse files
anish749regadas
authored andcommitted
Add jmh benchmarks for bloom filters (#1913)
1 parent b60b312 commit 3619202

File tree

3 files changed

+146
-0
lines changed

3 files changed

+146
-0
lines changed

scio-jmh/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# JMH benchmarks
2+
3+
JMH based benchmarks for certain specific components in Scio.
4+
5+
Getting started and running JMH benchmarks via sbt-shell:
6+
```
7+
$ sbt
8+
...
9+
sbt:scio> project scio-jmh
10+
sbt:scio-jmh> jmh:run -f1 -wi 2 -i 3 .*BloomFilter.*Benchmark.*
11+
```
12+
13+
The options for `jmh:run`
14+
- `-f1` Run with 1 fork
15+
- `-wi 2` Run 2 warm up iterations
16+
- `-i 3` Run 3 iterations
17+
- `.*BloomFilter.*Benchmark.*` RegExp for Benchmark
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright 2019 Spotify AB.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an
12+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
* KIND, either express or implied. See the License for the
14+
* specific language governing permissions and limitations
15+
* under the License.
16+
*/
17+
18+
package com.spotify.scio.jmh
19+
20+
import com.spotify.scio.util.{BloomFilter, BloomFilterAggregator, MutableBF}
21+
import org.openjdk.jmh.annotations._
22+
23+
import scala.util.Random
24+
25+
/**
26+
* Benchmarks for com.spotify.scio.util.BloomFilter
27+
*
28+
* Creating a BF from a collection
29+
*/
30+
object BloomFilterCreateBenchmark {
31+
def createRandomString(nbrOfStrings: Int, lengthOfStrings: Int): Seq[String] =
32+
Seq.fill(nbrOfStrings)(Random.nextString(lengthOfStrings))
33+
34+
@State(Scope.Benchmark)
35+
class BloomFilterState {
36+
@Param(Array("100", "1000", "10000"))
37+
var nbrOfElements: Int = 0
38+
39+
@Param(Array("0.01", "0.001"))
40+
var falsePositiveRate: Double = 0
41+
42+
var randomStrings: Seq[String] = _
43+
44+
@Setup(Level.Trial)
45+
def setup(): Unit =
46+
randomStrings = createRandomString(nbrOfElements, 10)
47+
48+
}
49+
}
50+
51+
@State(Scope.Benchmark)
52+
class BloomFilterCreateBenchmark {
53+
54+
import BloomFilterCreateBenchmark._
55+
56+
/**
57+
* Create a bloom filter by aggregating on a monoid.
58+
* This is the most efficient way to create the bloom filter.
59+
*/
60+
@Benchmark
61+
def scioMutableBF(bloomFilterState: BloomFilterState): MutableBF[String] = {
62+
val bfMonoid =
63+
BloomFilter[String](bloomFilterState.nbrOfElements, bloomFilterState.falsePositiveRate)
64+
val bfAggregator = BloomFilterAggregator(bfMonoid)
65+
66+
val sBf = bloomFilterState.randomStrings.aggregate(bfAggregator.monoid.zero)(_ += _, _ ++= _)
67+
sBf
68+
}
69+
70+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright 2019 Spotify AB.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an
12+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
* KIND, either express or implied. See the License for the
14+
* specific language governing permissions and limitations
15+
* under the License.
16+
*/
17+
18+
package com.spotify.scio.jmh
19+
20+
import com.spotify.scio.util.{BloomFilter, MutableBF}
21+
import com.twitter.algebird.ApproximateBoolean
22+
import org.openjdk.jmh.annotations._
23+
24+
/**
25+
* Benchmarks for com.spotify.scio.util.BloomFilter
26+
*
27+
* Querying for elements from a BloomFilter.
28+
*/
29+
object BloomFilterQueryBenchmark {
30+
31+
@State(Scope.Benchmark)
32+
class BloomFilterState {
33+
34+
@Param(Array("100", "1000", "10000"))
35+
var nbrOfElements: Int = 0
36+
37+
@Param(Array("0.001", "0.01"))
38+
var falsePositiveRate: Double = 0
39+
40+
private[scio] var bf: MutableBF[String] = _
41+
42+
@Setup(Level.Trial)
43+
def setup(): Unit = {
44+
val randomStrings =
45+
BloomFilterCreateBenchmark.createRandomString(nbrOfElements, 10)
46+
bf = BloomFilter[String](nbrOfElements, falsePositiveRate)
47+
.create(randomStrings: _*)
48+
}
49+
}
50+
}
51+
52+
@State(Scope.Benchmark)
53+
class BloomFilterQueryBenchmark {
54+
import BloomFilterQueryBenchmark._
55+
56+
@Benchmark
57+
def scioBloomFilter(bloomFilterState: BloomFilterState): ApproximateBoolean =
58+
bloomFilterState.bf.contains("1")
59+
}

0 commit comments

Comments
 (0)