1
+ /**
2
+ * Licensed to the Apache Software Foundation (ASF) under one or more
3
+ * contributor license agreements. See the NOTICE file distributed with
4
+ * this work for additional information regarding copyright ownership.
5
+ * The ASF licenses this file to You under the Apache License, Version 2.0
6
+ * (the "License"); you may not use this file except in compliance with
7
+ * the License. You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ /**
19
+ * Taken from org.apache.mahout.math
20
+ * https://github.com/apache/mahout
21
+ */
22
+
23
+ package com .twitter .chill .hadoop ;
24
+
25
+ import java .io .DataInputStream ;
26
+ import java .io .DataOutputStream ;
27
+ import java .io .IOException ;
28
+
29
+ /**
30
+ * <p>Encodes signed and unsigned values using a common variable-length
31
+ * scheme, found for example in
32
+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
33
+ * Google's Protocol Buffers</a>. It uses fewer bytes to encode smaller values,
34
+ * but will use slightly more bytes to encode large values.</p>
35
+ *
36
+ * <p>Signed values are further encoded using so-called zig-zag encoding
37
+ * in order to make them "compatible" with variable-length encoding.</p>
38
+ */
39
+ final class Varint {
40
+
41
+ private Varint () {
42
+ }
43
+
44
+ /**
45
+ * Encodes a value using the variable-length encoding from
46
+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
47
+ * Google Protocol Buffers</a>. It uses zig-zag encoding to efficiently
48
+ * encode signed values. If values are known to be nonnegative,
49
+ * {@link #writeUnsignedVarLong(long, java.io.DataOutputStream)} should be used.
50
+ *
51
+ * @param value value to encode
52
+ * @param out to write bytes to
53
+ * @throws java.io.IOException if {@link java.io.DataOutput} throws {@link java.io.IOException}
54
+ */
55
+ public static void writeSignedVarLong (long value , DataOutputStream out ) throws IOException {
56
+ // Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
57
+ writeUnsignedVarLong ((value << 1 ) ^ (value >> 63 ), out );
58
+ }
59
+
60
+ /**
61
+ * Encodes a value using the variable-length encoding from
62
+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
63
+ * Google Protocol Buffers</a>. Zig-zag is not used, so input must not be negative.
64
+ * If values can be negative, use {@link #writeSignedVarLong(long, java.io.DataOutputStream)}
65
+ * instead. This method treats negative input as like a large unsigned value.
66
+ *
67
+ * @param value value to encode
68
+ * @param out to write bytes to
69
+ * @throws java.io.IOException if {@link java.io.DataOutputStream} throws {@link java.io.IOException}
70
+ */
71
+ public static void writeUnsignedVarLong (long value , DataOutputStream out ) throws IOException {
72
+ while ((value & 0xFFFFFFFFFFFFFF80L ) != 0L ) {
73
+ out .writeByte (((int ) value & 0x7F ) | 0x80 );
74
+ value >>>= 7 ;
75
+ }
76
+ out .writeByte ((int ) value & 0x7F );
77
+ }
78
+
79
+ /**
80
+ * @see #writeSignedVarLong(long, java.io.DataOutputStream)
81
+ */
82
+ public static void writeSignedVarInt (int value , DataOutputStream out ) throws IOException {
83
+ // Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
84
+ writeUnsignedVarInt ((value << 1 ) ^ (value >> 31 ), out );
85
+ }
86
+
87
+ /**
88
+ * @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
89
+ */
90
+ public static void writeUnsignedVarInt (int value , DataOutputStream out ) throws IOException {
91
+ while ((value & 0xFFFFFF80 ) != 0L ) {
92
+ out .writeByte ((value & 0x7F ) | 0x80 );
93
+ value >>>= 7 ;
94
+ }
95
+ out .writeByte (value & 0x7F );
96
+ }
97
+
98
+ /**
99
+ * @param in to read bytes from
100
+ * @return decode value
101
+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
102
+ * @throws IllegalArgumentException if variable-length value does not terminate
103
+ * after 9 bytes have been read
104
+ * @see #writeSignedVarLong(long, java.io.DataOutputStream)
105
+ */
106
+ public static long readSignedVarLong (DataInputStream in ) throws IOException {
107
+ long raw = readUnsignedVarLong (in );
108
+ // This undoes the trick in writeSignedVarLong()
109
+ long temp = (((raw << 63 ) >> 63 ) ^ raw ) >> 1 ;
110
+ // This extra step lets us deal with the largest signed values by treating
111
+ // negative results from read unsigned methods as like unsigned values
112
+ // Must re-flip the top bit if the original read value had it set.
113
+ return temp ^ (raw & (1L << 63 ));
114
+ }
115
+
116
+ /**
117
+ * @param in to read bytes from
118
+ * @return decode value
119
+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
120
+ * @throws IllegalArgumentException if variable-length value does not terminate
121
+ * after 9 bytes have been read
122
+ * @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
123
+ */
124
+ public static long readUnsignedVarLong (DataInputStream in ) throws IOException {
125
+ long value = 0L ;
126
+ int i = 0 ;
127
+ long b ;
128
+ while (((b = in .readByte ()) & 0x80L ) != 0 ) {
129
+ value |= (b & 0x7F ) << i ;
130
+ i += 7 ;
131
+ }
132
+ return value | (b << i );
133
+ }
134
+
135
+ /**
136
+ * @throws IllegalArgumentException if variable-length value does not terminate
137
+ * after 5 bytes have been read
138
+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
139
+ * @see #readSignedVarLong(java.io.DataInputStream)
140
+ */
141
+ public static int readSignedVarInt (DataInputStream in ) throws IOException {
142
+ int raw = readUnsignedVarInt (in );
143
+ // This undoes the trick in writeSignedVarInt()
144
+ int temp = (((raw << 31 ) >> 31 ) ^ raw ) >> 1 ;
145
+ // This extra step lets us deal with the largest signed values by treating
146
+ // negative results from read unsigned methods as like unsigned values.
147
+ // Must re-flip the top bit if the original read value had it set.
148
+ return temp ^ (raw & (1 << 31 ));
149
+ }
150
+
151
+ /**
152
+ * @throws IllegalArgumentException if variable-length value does not terminate
153
+ * after 5 bytes have been read
154
+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
155
+ * @see #readUnsignedVarLong(java.io.DataInputStream)
156
+ */
157
+ public static int readUnsignedVarInt (DataInputStream in ) throws IOException {
158
+ int value = 0 ;
159
+ int i = 0 ;
160
+ int b ;
161
+ while (((b = in .readByte ()) & 0x80 ) != 0 && i < 42 ) {
162
+ value |= (b & 0x7F ) << i ;
163
+ i += 7 ;
164
+ }
165
+ if (i == 42 ) { // Over read!
166
+ throw new IllegalArgumentException ("Read more than 5 bytes of data, must be invalid Var int" );
167
+ }
168
+ return value | (b << i );
169
+ }
170
+
171
+ }
0 commit comments