Skip to content

Commit 339e200

Browse files
committed
Merge pull request #245 from twitter/Kryo3Upgrade
Kryo 3.0.3 upgrade
2 parents c37b59b + 1ec4dc3 commit 339e200

File tree

9 files changed

+213
-31
lines changed

9 files changed

+213
-31
lines changed

build.sbt

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
55
import scala.collection.JavaConverters._
66
import scalariform.formatter.preferences._
77

8-
val kryoVersion = "2.21"
8+
val kryoVersion = "3.0.3"
99
val bijectionVersion = "0.9.0"
1010
val algebirdVersion = "0.12.0"
1111

@@ -32,7 +32,7 @@ val sharedSettings = Project.defaultSettings ++ mimaDefaultSettings ++ scalarifo
3232
libraryDependencies ++= Seq(
3333
"org.scalacheck" %% "scalacheck" % "1.11.5" % "test",
3434
"org.scalatest" %% "scalatest" % "2.2.2" % "test",
35-
"com.esotericsoftware.kryo" % "kryo" % kryoVersion
35+
"com.esotericsoftware" % "kryo-shaded" % kryoVersion
3636
),
3737

3838
parallelExecution in Test := true,

chill-hadoop/src/main/java/com/twitter/chill/hadoop/KryoDeserializer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public void open(InputStream in) throws IOException {
4646

4747
public Object deserialize(Object o) throws IOException {
4848
// TODO, we could share these buffers if we see that alloc is bottlenecking
49-
byte[] bytes = new byte[inputStream.readInt()];
49+
byte[] bytes = new byte[Varint.readUnsignedVarInt(inputStream)];
5050
inputStream.readFully( bytes );
5151
return kryoPool.fromBytes(bytes, klass);
5252
}

chill-hadoop/src/main/java/com/twitter/chill/hadoop/KryoSerializer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public void serialize(Object o) throws IOException {
4747
try {
4848
st.writeObject(o);
4949
// Copy from buffer to output stream.
50-
outputStream.writeInt(st.numOfWrittenBytes());
50+
Varint.writeUnsignedVarInt(st.numOfWrittenBytes(), outputStream);
5151
st.writeOutputTo(outputStream);
5252
}
5353
finally {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
/**
19+
* Taken from org.apache.mahout.math
20+
* https://github.com/apache/mahout
21+
*/
22+
23+
package com.twitter.chill.hadoop;
24+
25+
import java.io.DataInputStream;
26+
import java.io.DataOutputStream;
27+
import java.io.IOException;
28+
29+
/**
30+
* <p>Encodes signed and unsigned values using a common variable-length
31+
* scheme, found for example in
32+
* <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
33+
* Google's Protocol Buffers</a>. It uses fewer bytes to encode smaller values,
34+
* but will use slightly more bytes to encode large values.</p>
35+
*
36+
* <p>Signed values are further encoded using so-called zig-zag encoding
37+
* in order to make them "compatible" with variable-length encoding.</p>
38+
*/
39+
final class Varint {
40+
41+
private Varint() {
42+
}
43+
44+
/**
45+
* Encodes a value using the variable-length encoding from
46+
* <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
47+
* Google Protocol Buffers</a>. It uses zig-zag encoding to efficiently
48+
* encode signed values. If values are known to be nonnegative,
49+
* {@link #writeUnsignedVarLong(long, java.io.DataOutputStream)} should be used.
50+
*
51+
* @param value value to encode
52+
* @param out to write bytes to
53+
* @throws java.io.IOException if {@link java.io.DataOutput} throws {@link java.io.IOException}
54+
*/
55+
public static void writeSignedVarLong(long value, DataOutputStream out) throws IOException {
56+
// Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
57+
writeUnsignedVarLong((value << 1) ^ (value >> 63), out);
58+
}
59+
60+
/**
61+
* Encodes a value using the variable-length encoding from
62+
* <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
63+
* Google Protocol Buffers</a>. Zig-zag is not used, so input must not be negative.
64+
* If values can be negative, use {@link #writeSignedVarLong(long, java.io.DataOutputStream)}
65+
* instead. This method treats negative input as like a large unsigned value.
66+
*
67+
* @param value value to encode
68+
* @param out to write bytes to
69+
* @throws java.io.IOException if {@link java.io.DataOutputStream} throws {@link java.io.IOException}
70+
*/
71+
public static void writeUnsignedVarLong(long value, DataOutputStream out) throws IOException {
72+
while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
73+
out.writeByte(((int) value & 0x7F) | 0x80);
74+
value >>>= 7;
75+
}
76+
out.writeByte((int) value & 0x7F);
77+
}
78+
79+
/**
80+
* @see #writeSignedVarLong(long, java.io.DataOutputStream)
81+
*/
82+
public static void writeSignedVarInt(int value, DataOutputStream out) throws IOException {
83+
// Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
84+
writeUnsignedVarInt((value << 1) ^ (value >> 31), out);
85+
}
86+
87+
/**
88+
* @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
89+
*/
90+
public static void writeUnsignedVarInt(int value, DataOutputStream out) throws IOException {
91+
while ((value & 0xFFFFFF80) != 0L) {
92+
out.writeByte((value & 0x7F) | 0x80);
93+
value >>>= 7;
94+
}
95+
out.writeByte(value & 0x7F);
96+
}
97+
98+
/**
99+
* @param in to read bytes from
100+
* @return decode value
101+
* @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
102+
* @throws IllegalArgumentException if variable-length value does not terminate
103+
* after 9 bytes have been read
104+
* @see #writeSignedVarLong(long, java.io.DataOutputStream)
105+
*/
106+
public static long readSignedVarLong(DataInputStream in) throws IOException {
107+
long raw = readUnsignedVarLong(in);
108+
// This undoes the trick in writeSignedVarLong()
109+
long temp = (((raw << 63) >> 63) ^ raw) >> 1;
110+
// This extra step lets us deal with the largest signed values by treating
111+
// negative results from read unsigned methods as like unsigned values
112+
// Must re-flip the top bit if the original read value had it set.
113+
return temp ^ (raw & (1L << 63));
114+
}
115+
116+
/**
117+
* @param in to read bytes from
118+
* @return decode value
119+
* @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
120+
* @throws IllegalArgumentException if variable-length value does not terminate
121+
* after 9 bytes have been read
122+
* @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
123+
*/
124+
public static long readUnsignedVarLong(DataInputStream in) throws IOException {
125+
long value = 0L;
126+
int i = 0;
127+
long b;
128+
while (((b = in.readByte()) & 0x80L) != 0) {
129+
value |= (b & 0x7F) << i;
130+
i += 7;
131+
}
132+
return value | (b << i);
133+
}
134+
135+
/**
136+
* @throws IllegalArgumentException if variable-length value does not terminate
137+
* after 5 bytes have been read
138+
* @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
139+
* @see #readSignedVarLong(java.io.DataInputStream)
140+
*/
141+
public static int readSignedVarInt(DataInputStream in) throws IOException {
142+
int raw = readUnsignedVarInt(in);
143+
// This undoes the trick in writeSignedVarInt()
144+
int temp = (((raw << 31) >> 31) ^ raw) >> 1;
145+
// This extra step lets us deal with the largest signed values by treating
146+
// negative results from read unsigned methods as like unsigned values.
147+
// Must re-flip the top bit if the original read value had it set.
148+
return temp ^ (raw & (1 << 31));
149+
}
150+
151+
/**
152+
* @throws IllegalArgumentException if variable-length value does not terminate
153+
* after 5 bytes have been read
154+
* @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
155+
* @see #readUnsignedVarLong(java.io.DataInputStream)
156+
*/
157+
public static int readUnsignedVarInt(DataInputStream in) throws IOException {
158+
int value = 0;
159+
int i = 0;
160+
int b;
161+
while (((b = in.readByte()) & 0x80) != 0 && i < 42) {
162+
value |= (b & 0x7F) << i;
163+
i += 7;
164+
}
165+
if(i == 42) { // Over read!
166+
throw new IllegalArgumentException("Read more than 5 bytes of data, must be invalid Var int");
167+
}
168+
return value | (b << i);
169+
}
170+
171+
}

chill-java/src/main/java/com/twitter/chill/ReflectingDefaultRegistrar.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public ReflectingDefaultRegistrar(Class<T> cls, Class<? extends Serializer<?>> s
3232
public Class<T> getRegisteredClass() { return klass; }
3333
public Class<? extends Serializer<?>> getSerializerClass() { return serializerKlass; }
3434
@Override
35-
public void apply(Kryo k) { k.addDefaultSerializer(klass, k.newSerializer(serializerKlass, klass)); }
35+
public void apply(Kryo k) { k.addDefaultSerializer(klass, serializerKlass); }
3636

3737
@Override
3838
public int hashCode() { return klass.hashCode() ^ serializerKlass.hashCode(); }

chill-java/src/main/java/com/twitter/chill/ReflectingRegistrar.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import com.esotericsoftware.kryo.Kryo;
2020
import com.esotericsoftware.kryo.Serializer;
21+
import com.esotericsoftware.kryo.util.Util;
2122

2223
/** Use reflection to instantiate a serializer.
2324
* Used when serializer classes are written to config files
@@ -34,8 +35,16 @@ public ReflectingRegistrar(Class<T> cls, Class<? extends Serializer<?>> ser) {
3435
klass = cls;
3536
serializerKlass = ser;
3637
}
38+
3739
@Override
38-
public void apply(Kryo k) { k.register(klass, k.newSerializer(serializerKlass, klass)); }
40+
public void apply(Kryo k) {
41+
try {
42+
k.register(klass, serializerKlass.newInstance());
43+
} catch (Exception ex) {
44+
throw new IllegalArgumentException("Unable to create serializer \"" + serializerKlass.getName() + "\" for class: "
45+
+ Util.className(klass), ex);
46+
}
47+
}
3948
@Override
4049
public int hashCode() { return klass.hashCode() ^ serializerKlass.hashCode(); }
4150

chill-java/src/main/java/com/twitter/chill/SerDeState.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ public void clear() {
5050
public void setInput(byte[] in, int offset, int count) { input.setBuffer(in, offset, count); }
5151
public void setInput(InputStream in) { input.setInputStream(in); }
5252

53-
public int numOfWrittenBytes() { return output.total(); }
54-
public int numOfReadBytes() { return input.total(); }
53+
public int numOfWrittenBytes() { return (int)output.total(); }
54+
public int numOfReadBytes() { return (int)input.total(); }
5555

5656
// Common operations:
5757
public <T> T readObject(Class<T> cls) {

chill-scala/src/main/scala/com/twitter/chill/KryoBase.scala

+24-22
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import org.objenesis.strategy.InstantiatorStrategy
2525

2626
import _root_.java.lang.reflect.{ Constructor, Modifier }
2727

28+
import scala.util.{ Try, Success, Failure }
29+
2830
/*
2931
* This is the base class of Kryo we use to fix specific scala
3032
* related issues discovered (ideally, this should be fixed in Kryo)
@@ -90,37 +92,37 @@ class KryoBase extends Kryo {
9092
/* Fixes the case where Kryo's reflectasm doesn't work, even though it claims to
9193
* TODO this should be fixed in Kryo. When it is, remove this
9294
*/
93-
override def newInstantiator(cls: Class[_]) = {
95+
override def newInstantiator(cls: Class[_]) = newTypedInstantiator[AnyRef](cls.asInstanceOf[Class[AnyRef]])
96+
97+
private[this] def newTypedInstantiator[T](cls: Class[T]) = {
9498
import Instantiators._
9599
newOrElse(cls,
96-
List(reflectAsm _, normalJava _),
100+
List(reflectAsm[T](_), normalJava[T](_)),
97101
// Or fall back on the strategy:
98102
tryStrategy(cls).newInstantiatorOf(cls))
99103
}
100104
}
101105

102106
object Instantiators {
103107
// Go through the list and use the first that works
104-
def newOrElse(cls: Class[_],
105-
it: TraversableOnce[Class[_] => Either[Throwable, ObjectInstantiator]],
106-
elsefn: => ObjectInstantiator): ObjectInstantiator = {
108+
def newOrElse[T](cls: Class[T],
109+
it: TraversableOnce[Class[T] => Try[ObjectInstantiator[T]]],
110+
elsefn: => ObjectInstantiator[T]): ObjectInstantiator[T] = {
107111
// Just go through and try each one,
108-
it.map { fn =>
109-
fn(cls) match {
110-
case Left(x) => None // ignore the exception
111-
case Right(obji) => Some(obji)
112+
113+
it
114+
.flatMap { fn =>
115+
fn(cls).toOption
112116
}
113-
}
114-
.find { _.isDefined } // Find the first Some(x), returns Some(Some(x))
115-
.flatMap { x => x } // flatten
117+
.find (_ => true) // first element in traversable once (no headOption defined.)
116118
.getOrElse(elsefn)
117119
}
118120

119121
// Use call by name:
120-
def forClass(t: Class[_])(fn: () => Any): ObjectInstantiator =
121-
new ObjectInstantiator {
122+
def forClass[T](t: Class[T])(fn: () => T): ObjectInstantiator[T] =
123+
new ObjectInstantiator[T] {
122124
override def newInstance() = {
123-
try { fn().asInstanceOf[AnyRef] }
125+
try { fn() }
124126
catch {
125127
case x: Exception => {
126128
throw new KryoException("Error constructing instance of class: " + t.getName, x)
@@ -130,19 +132,19 @@ object Instantiators {
130132
}
131133

132134
// This one tries reflectasm, which is a fast way of constructing an object
133-
def reflectAsm(t: Class[_]): Either[Throwable, ObjectInstantiator] = {
135+
def reflectAsm[T](t: Class[T]): Try[ObjectInstantiator[T]] = {
134136
try {
135137
val access = ConstructorAccess.get(t)
136138
// Try it once, because this isn't always successful:
137139
access.newInstance
138140
// Okay, looks good:
139-
Right(forClass(t) { () => access.newInstance() })
141+
Success(forClass(t) { () => access.newInstance() })
140142
} catch {
141-
case x: Throwable => Left(x)
143+
case x: Throwable => Failure(x)
142144
}
143145
}
144146

145-
def getConstructor(c: Class[_]): Constructor[_] = {
147+
def getConstructor[T](c: Class[T]): Constructor[T] = {
146148
try {
147149
c.getConstructor()
148150
} catch {
@@ -154,12 +156,12 @@ object Instantiators {
154156
}
155157
}
156158

157-
def normalJava(t: Class[_]): Either[Throwable, ObjectInstantiator] = {
159+
def normalJava[T](t: Class[T]): Try[ObjectInstantiator[T]] = {
158160
try {
159161
val cons = getConstructor(t)
160-
Right(forClass(t) { () => cons.newInstance() })
162+
Success(forClass(t) { () => cons.newInstance() })
161163
} catch {
162-
case x: Throwable => Left(x)
164+
case x: Throwable => Failure(x)
163165
}
164166
}
165167
}

version.sbt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version in ThisBuild := "0.7.5-SNAPSHOT"
1+
version in ThisBuild := "0.8.0-SNAPSHOT"

0 commit comments

Comments
 (0)