Skip to content

Commit 3338d0d

Browse files
committed
PDFBOX-5902: provide singleton instances for heavily used Integer and byte[] values to avoid multiple instances
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922578 13f79535-47bb-0310-9956-ffa450edef68
1 parent 3fdb0cf commit 3338d0d

File tree

2 files changed

+47
-24
lines changed

2 files changed

+47
-24
lines changed

fontbox/src/main/java/org/apache/fontbox/cmap/CMap.java

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -332,25 +332,6 @@ private int toCIDFromRanges(byte[] code)
332332
return 0;
333333
}
334334

335-
/**
336-
* Convert the given part of a byte array to an integer.
337-
*
338-
* @param data the byte array
339-
* @param offset The offset into the byte array.
340-
* @param length The length of the data we are getting.
341-
* @return the resulting integer
342-
*/
343-
private int getCodeFromArray( byte[] data, int offset, int length )
344-
{
345-
int code = 0;
346-
for( int i=0; i<length; i++ )
347-
{
348-
code <<= 8;
349-
code |= (data[offset+i]+256)%256;
350-
}
351-
return code;
352-
}
353-
354335
/**
355336
* This will add a character code to Unicode character sequence mapping.
356337
*
@@ -359,15 +340,15 @@ private int getCodeFromArray( byte[] data, int offset, int length )
359340
*/
360341
void addCharMapping(byte[] codes, String unicode)
361342
{
362-
unicodeToByteCodes.put(unicode, codes.clone()); // clone needed, bytes is modified later
363-
int code = getCodeFromArray(codes, 0, codes.length);
364343
if (codes.length == 1)
365344
{
366-
charToUnicodeOneByte.put(code, unicode);
345+
charToUnicodeOneByte.put(CMapStrings.getIndexValue(codes), unicode);
346+
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
367347
}
368348
else if (codes.length == 2)
369349
{
370-
charToUnicodeTwoBytes.put(code, unicode);
350+
charToUnicodeTwoBytes.put(CMapStrings.getIndexValue(codes), unicode);
351+
unicodeToByteCodes.put(unicode, CMapStrings.getByteValue(codes));
371352
}
372353
else
373354
{
@@ -376,7 +357,7 @@ else if (codes.length == 2)
376357
// fixme: ugly little hack
377358
if (SPACE.equals(unicode))
378359
{
379-
spaceMapping = code;
360+
spaceMapping = toInt(codes);
380361
}
381362
}
382363

fontbox/src/main/java/org/apache/fontbox/cmap/CMapStrings.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ public class CMapStrings
3030
private static final List<String> twoByteMappings = new ArrayList<>(256 * 256);
3131
private static final List<String> oneByteMappings = new ArrayList<>(256);
3232

33+
private static final List<Integer> indexValues = new ArrayList<>(256 * 256);
34+
private static final List<byte[]> oneByteValues = new ArrayList<>(256);
35+
private static final List<byte[]> twoByteValues = new ArrayList<>(256 * 256);
36+
3337
static
3438
{
3539
// create all mappings when loading the class to avoid concurrency issues
@@ -48,12 +52,15 @@ private static void fillMappings()
4852
{
4953
byte[] bytes = { (byte) i, (byte) j };
5054
twoByteMappings.add(new String(bytes, StandardCharsets.UTF_16BE));
55+
twoByteValues.add(bytes);
56+
indexValues.add((i * 256) + j);
5157
}
5258
}
5359
for (int i = 0; i < 256; i++)
5460
{
5561
byte[] bytes = { (byte) i };
5662
oneByteMappings.add(new String(bytes, StandardCharsets.ISO_8859_1));
63+
oneByteValues.add(bytes);
5764
}
5865
}
5966

@@ -73,4 +80,39 @@ public static String getMapping(byte[] bytes)
7380
return bytes.length == 1 ? oneByteMappings.get(CMap.toInt(bytes))
7481
: twoByteMappings.get(CMap.toInt(bytes));
7582
}
83+
84+
/**
85+
* Get an Integer instance of the given combination of bytes. Each value is a singleton to avoid multiple instances
86+
* for same value. The values are limited to one and two-byte sequences. Any longer byte sequence produces null as
87+
* return value.
88+
*
89+
* @param bytes the given combination of bytes
90+
* @return the Integer representation for the given combination of bytes
91+
*/
92+
public static Integer getIndexValue(byte[] bytes)
93+
{
94+
if (bytes.length > 2)
95+
{
96+
return null;
97+
}
98+
return indexValues.get(CMap.toInt(bytes));
99+
}
100+
101+
/**
102+
* Get a singleton instance of the given combination of bytes to avoid multiple instances for same value. The values
103+
* are limited to one and two-byte sequences. Any longer byte sequence produces null as return value.
104+
*
105+
* @param bytes the given combination of bytes
106+
* @return a singleton instance for the given combination of bytes
107+
*/
108+
public static byte[] getByteValue(byte[] bytes)
109+
{
110+
if (bytes.length > 2)
111+
{
112+
return null;
113+
}
114+
return bytes.length == 1 ? oneByteValues.get(CMap.toInt(bytes))
115+
: twoByteValues.get(CMap.toInt(bytes));
116+
}
117+
76118
}

0 commit comments

Comments
 (0)