Skip to content

Commit 24431b1

Browse files
committed
Optimize internal representation of IndirectReference
1 parent 8f9194c commit 24431b1

File tree

3 files changed

+78
-9
lines changed

3 files changed

+78
-9
lines changed

src/UglyToad.PdfPig.Core/IndirectReference.cs

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,33 @@
33
using System;
44
using System.Diagnostics;
55

6+
// https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java#L25
7+
68
/// <summary>
79
/// Used to uniquely identify and refer to objects in the PDF file.
810
/// </summary>
911
public readonly struct IndirectReference : IEquatable<IndirectReference>
1012
{
13+
private const int NUMBER_OFFSET = sizeof(ushort) * 8;
14+
private static readonly long GENERATION_MASK = (long)Math.Pow(2, NUMBER_OFFSET) - 1;
15+
private static readonly long MAX_OBJECT_NUMBER = (long)(Math.Pow(2, sizeof(long) * 8 - NUMBER_OFFSET) - 1) / 2;
16+
17+
// combined number and generation
18+
// The lowest 16 bits hold the generation 0-65535
19+
// The rest is used for the number (even though 34 bit are sufficient for 10 digits)
20+
private readonly long numberAndGeneration;
21+
1122
/// <summary>
1223
/// A positive integer object number.
1324
/// </summary>
14-
public long ObjectNumber { get; }
25+
// Below is different from PdfBox as we keep the sign of the offset number (use >> instead of >>> (unsigned right shift))
26+
public long ObjectNumber => numberAndGeneration >> NUMBER_OFFSET;
1527

1628
/// <summary>
1729
/// A non-negative integer generation number which starts as 0 and increases if the file is updated incrementally.
30+
/// <para>The maximum generation number is 65,535.</para>
1831
/// </summary>
19-
public int Generation { get; }
32+
public int Generation => (int)(numberAndGeneration & GENERATION_MASK);
2033

2134
/// <summary>
2235
/// Create a new <see cref="IndirectReference"/>
@@ -26,14 +39,34 @@
2639
[DebuggerStepThrough]
2740
public IndirectReference(long objectNumber, int generation)
2841
{
29-
ObjectNumber = objectNumber;
30-
Generation = generation;
42+
if (generation < 0 || generation > ushort.MaxValue)
43+
{
44+
throw new ArgumentOutOfRangeException(nameof(generation), "Generation number must not be a negative value, and less or equal to 65,535.");
45+
}
46+
47+
if (objectNumber < -MAX_OBJECT_NUMBER || objectNumber > MAX_OBJECT_NUMBER)
48+
{
49+
throw new ArgumentOutOfRangeException(nameof(objectNumber), $"Object number must be between -{MAX_OBJECT_NUMBER:##,###} and {MAX_OBJECT_NUMBER:##,###}.");
50+
}
51+
52+
numberAndGeneration = ComputeInternalHash(objectNumber, generation);
53+
}
54+
55+
/// <summary>
56+
/// Calculate the internal hash value for the given object number and generation number.
57+
/// </summary>
58+
/// <param name="num">The object number.</param>
59+
/// <param name="gen">The generation number.</param>
60+
/// <returns>The internal hash for the given values.</returns>
61+
private static long ComputeInternalHash(long num, int gen)
62+
{
63+
return num << NUMBER_OFFSET | (gen & GENERATION_MASK);
3164
}
3265

3366
/// <inheritdoc />
3467
public bool Equals(IndirectReference other)
3568
{
36-
return other.ObjectNumber == ObjectNumber && other.Generation == Generation;
69+
return other.numberAndGeneration == numberAndGeneration;
3770
}
3871

3972
/// <inheritdoc />
@@ -45,7 +78,7 @@ public override bool Equals(object obj)
4578
/// <inheritdoc />
4679
public override int GetHashCode()
4780
{
48-
return HashCode.Combine(ObjectNumber, Generation);
81+
return numberAndGeneration.GetHashCode();
4982
}
5083

5184
/// <inheritdoc />

src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,44 @@ public void TwoIndirectReferenceEqual()
3030
Assert.True(reference1.Equals(reference2));
3131
}
3232

33+
[Fact]
34+
public void IndirectReferenceHashTest()
35+
{
36+
var reference0 = new IndirectReference(1574, 690);
37+
Assert.Equal(1574, reference0.ObjectNumber);
38+
Assert.Equal(690, reference0.Generation);
39+
40+
var reference1 = new IndirectReference(-1574, 690);
41+
Assert.Equal(-1574, reference1.ObjectNumber);
42+
Assert.Equal(690, reference1.Generation);
43+
44+
var reference2 = new IndirectReference(58949797283757, 16);
45+
Assert.Equal(58949797283757, reference2.ObjectNumber);
46+
Assert.Equal(16, reference2.Generation);
47+
48+
var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
49+
Assert.Equal(-58949797283757, reference3.ObjectNumber);
50+
Assert.Equal(ushort.MaxValue, reference3.Generation);
51+
52+
var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
53+
Assert.Equal(140737488355327, reference4.ObjectNumber);
54+
Assert.Equal(ushort.MaxValue, reference4.Generation);
55+
56+
var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
57+
Assert.Equal(-140737488355327, reference5.ObjectNumber);
58+
Assert.Equal(ushort.MaxValue, reference5.Generation);
59+
60+
var ex0 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(140737488355328, 0));
61+
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
62+
var ex1 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(-140737488355328, 0));
63+
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
64+
65+
var ex2 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, -1));
66+
Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex2.Message);
67+
var ex3 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, ushort.MaxValue + 1));
68+
Assert.StartsWith("Generation number must not be a negative value, and less or equal to 65,535.", ex3.Message);
69+
}
70+
3371
[Fact]
3472
public void TwoIndirectReferenceNotEqual()
3573
{

src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ private static bool CheckObjectKeys(IInputBytes bytes, IndirectReference objectK
9494
long objectGen = objectKey.Generation;
9595
var originOffset = bytes.CurrentOffset;
9696

97-
var objectString = ObjectHelper.CreateObjectString(objectNr, objectGen);
98-
9997
try
10098
{
10199
if (offset >= bytes.Length)
@@ -111,7 +109,7 @@ private static bool CheckObjectKeys(IInputBytes bytes, IndirectReference objectK
111109
bytes.MoveNext();
112110
}
113111

114-
if (ReadHelper.IsString(bytes, objectString))
112+
if (ReadHelper.IsString(bytes, ObjectHelper.CreateObjectString(objectNr, objectGen)))
115113
{
116114
// everything is ok, return origin object key
117115
bytes.Seek(originOffset);

0 commit comments

Comments
 (0)