diff --git a/API.Tests/Services/BookServiceTests.cs b/API.Tests/Services/BookServiceTests.cs index 23716e2f74..4e726e469c 100644 --- a/API.Tests/Services/BookServiceTests.cs +++ b/API.Tests/Services/BookServiceTests.cs @@ -81,4 +81,46 @@ public void ShouldParseAsVolumeGroup_WithSeriesIndex() Assert.Equal("Accel World", comicInfo.Series); } + [Fact] + public void ShouldHaveComicInfoForPDF() + { + var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/BookService"); + var document = Path.Join(testDirectory, "test.pdf"); + var comicInfo = _bookService.GetComicInfo(document); + Assert.NotNull(comicInfo); + Assert.Equal("Variations Chromatiques de concert", comicInfo.Title); + Assert.Equal("Georges Bizet \\(1838-1875\\)", comicInfo.Writer); + } + + [Fact] + public void ShouldUsePDFInfoDict() + { + var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/Library/Books/PDFs"); + var document = Path.Join(testDirectory, "Rollo at Work SP01.pdf"); + var comicInfo = _bookService.GetComicInfo(document); + Assert.NotNull(comicInfo); + Assert.Equal("Rollo at Work", comicInfo.Title); + Assert.Equal("Jacob Abbott", comicInfo.Writer); + Assert.Equal(2008, comicInfo.Year); + } + + [Fact] + public void ShouldHandleIndirectPDFObjects() + { + var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/BookService"); + var document = Path.Join(testDirectory, "indirect.pdf"); + var comicInfo = _bookService.GetComicInfo(document); + Assert.NotNull(comicInfo); + Assert.Equal(2018, comicInfo.Year); + Assert.Equal(8, comicInfo.Month); + } + + [Fact] + public void FailGracefullyWithEncryptedPDF() + { + var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/BookService"); + var document = Path.Join(testDirectory, "encrypted.pdf"); + var comicInfo = _bookService.GetComicInfo(document); + Assert.Null(comicInfo); + } } diff --git a/API.Tests/Services/Test Data/BookService/encrypted.pdf b/API.Tests/Services/Test Data/BookService/encrypted.pdf new file mode 100644 index 0000000000..64249b7288 Binary files /dev/null and b/API.Tests/Services/Test Data/BookService/encrypted.pdf differ diff --git a/API.Tests/Services/Test Data/BookService/indirect.pdf b/API.Tests/Services/Test Data/BookService/indirect.pdf new file mode 100644 index 0000000000..11ecdcb763 Binary files /dev/null and b/API.Tests/Services/Test Data/BookService/indirect.pdf differ diff --git a/API/Helpers/PdfComicInfoExtractor.cs b/API/Helpers/PdfComicInfoExtractor.cs new file mode 100644 index 0000000000..f01a256041 --- /dev/null +++ b/API/Helpers/PdfComicInfoExtractor.cs @@ -0,0 +1,159 @@ +/// Translate PDF metadata (See PdfMetadataExtractor.cs) into ComicInfo structure. + +// Contributed by https://github.com/microtherion + +// All references to the "PDF Spec" (section numbers, etc) refer to the +// PDF 1.7 Specification a.k.a. PDF32000-1:2008 +// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf + +using System; +using System.Xml; +using System.Text; +using System.IO; +using System.Diagnostics; +using API.Data.Metadata; +using API.Entities.Enums; +using API.Services; +using API.Services.Tasks.Scanner.Parser; +using Microsoft.Extensions.Logging; +using Nager.ArticleNumber; +using System.Collections.Generic; + +namespace API.Helpers; +#nullable enable + +public interface IPdfComicInfoExtractor +{ + ComicInfo? GetComicInfo(string filePath); +} + +public class PdfComicInfoExtractor : IPdfComicInfoExtractor +{ + private readonly ILogger _logger; + private readonly IMediaErrorService _mediaErrorService; + private readonly string[] _pdfDateFormats = [ // PDF Spec 7.9.4 + "D:yyyyMMddHHmmsszzz:", "D:yyyyMMddHHmmss+", "D:yyyyMMddHHmmss", + "D:yyyyMMddHHmmzzz:", "D:yyyyMMddHHmm+", "D:yyyyMMddHHmm", + "D:yyyyMMddHHzzz:", "D:yyyyMMddHH+", "D:yyyyMMddHH", + "D:yyyyMMdd", "D:yyyyMM", "D:yyyy" + ]; + + public PdfComicInfoExtractor(ILogger logger, IMediaErrorService mediaErrorService) + { + _logger = logger; + _mediaErrorService = mediaErrorService; + } + + private float? GetFloatFromText(string? text) + { + if (string.IsNullOrEmpty(text)) return null; + + if (float.TryParse(text, out var value)) return value; + + return null; + } + + private DateTime? GetDateTimeFromText(string? text) + { + if (string.IsNullOrEmpty(text)) return null; + + // Dates stored in the XMP metadata stream (PDF Spec 14.3.2) + // are stored in ISO 8601 format, which is handled by C# out of the box + if (DateTime.TryParse(text, out var date)) return date; + + // Dates stored in the document information directory (PDF Spec 14.3.3) + // are stored in a proprietary format (PDF Spec 7.9.4) that needs to be + // massaged slightly to be expressible by a DateTime format. + if (text[0] != 'D') { + text = "D:" + text; + } + text = text.Replace("'", ":"); + text = text.Replace("Z", "+"); + + foreach(var format in _pdfDateFormats) + { + if (DateTime.TryParseExact(text, format, null, System.Globalization.DateTimeStyles.None, out var pdfDate)) return pdfDate; + } + + return null; + } + + private string? MaybeGetMetadata(Dictionary metadata, string key) + { + return metadata.ContainsKey(key) ? metadata[key] : null; + } + + private ComicInfo? GetComicInfoFromMetadata(Dictionary metadata, string filePath) + { + var info = new ComicInfo(); + + var publicationDate = GetDateTimeFromText(MaybeGetMetadata(metadata, "CreationDate")); + + if (publicationDate != null) + { + info.Year = publicationDate.Value.Year; + info.Month = publicationDate.Value.Month; + info.Day = publicationDate.Value.Day; + } + + info.Summary = MaybeGetMetadata(metadata, "Summary") ?? string.Empty; + info.Publisher = MaybeGetMetadata(metadata, "Publisher") ?? string.Empty; + info.Writer = MaybeGetMetadata(metadata, "Author") ?? string.Empty; + info.Title = MaybeGetMetadata(metadata, "Title") ?? string.Empty; + info.Genre = MaybeGetMetadata(metadata, "Subject") ?? string.Empty; + info.LanguageISO = BookService.ValidateLanguage(MaybeGetMetadata(metadata, "Language")); + info.Isbn = MaybeGetMetadata(metadata, "ISBN") ?? string.Empty; + + if (info.Isbn != string.Empty && !ArticleNumberHelper.IsValidIsbn10(info.Isbn) && !ArticleNumberHelper.IsValidIsbn13(info.Isbn)) + { + _logger.LogDebug("[BookService] {File} has an invalid ISBN number", filePath); + info.Isbn = string.Empty; + } + + info.UserRating = GetFloatFromText(MaybeGetMetadata(metadata, "UserRating")) ?? 0.0f; + info.TitleSort = MaybeGetMetadata(metadata, "TitleSort") ?? string.Empty; + info.Series = MaybeGetMetadata(metadata, "Series") ?? info.TitleSort; + info.SeriesSort = info.Series; + info.Volume = (GetFloatFromText(MaybeGetMetadata(metadata, "Volume")) ?? 0.0f).ToString(); + + // If this is a single book and not a collection, set publication status to Completed + if (string.IsNullOrEmpty(info.Volume) && Parser.ParseVolume(filePath, LibraryType.Manga).Equals(Parser.LooseLeafVolume)) + { + info.Count = 1; + } + + // Removed as probably unneeded per discussion in https://github.com/Kareadita/Kavita/pull/3108#discussion_r1956747782 + // + // var hasVolumeInSeries = !Parser.ParseVolume(info.Title, LibraryType.Manga) + // .Equals(Parser.LooseLeafVolume); + + // if (string.IsNullOrEmpty(info.Volume) && hasVolumeInSeries && (!info.Series.Equals(info.Title) || string.IsNullOrEmpty(info.Series))) + // { + // // This is likely a light novel for which we can set series from parsed title + // info.Series = Parser.ParseSeries(info.Title, LibraryType.Manga); + // info.Volume = Parser.ParseVolume(info.Title, LibraryType.Manga); + // } + + ComicInfo.CleanComicInfo(info); + + return info; + } + + public ComicInfo? GetComicInfo(string filePath) + { + try + { + var extractor = new PdfMetadataExtractor(_logger, filePath); + + return GetComicInfoFromMetadata(extractor.GetMetadata(), filePath); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "[GetComicInfo] There was an exception parsing PDF metadata for {File}", filePath); + _mediaErrorService.ReportMediaIssue(filePath, MediaErrorProducer.BookService, + "There was an exception parsing PDF metadata", ex); + } + + return null; + } +} \ No newline at end of file diff --git a/API/Helpers/PdfMetadataExtractor.cs b/API/Helpers/PdfMetadataExtractor.cs new file mode 100644 index 0000000000..d33c59890f --- /dev/null +++ b/API/Helpers/PdfMetadataExtractor.cs @@ -0,0 +1,1662 @@ +/// Parse PDF file and try to extract as much metadata as possible. +/// Supports both text based XRef tables and compressed XRef streams (Deflate only). +/// Supports both UTF-16 and PDFDocEncoding for strings. +/// Lacks support for many PDF configurations that are theoretically possible, but should handle most common cases. + +// Contributed by https://github.com/microtherion + +// All references to the "PDF Spec" (section numbers, etc) refer to the +// PDF 1.7 Specification a.k.a. PDF32000-1:2008 +// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO.Compression; +using System.Reflection.Metadata.Ecma335; +using System.Runtime.InteropServices; +using System.Security.Principal; +using System.Text; +using System.Xml; +using System.IO; +using Microsoft.Extensions.Logging; +using API.Services; + +namespace API.Helpers; +#nullable enable + +public class PdfMetadataExtractorException : Exception +{ + public PdfMetadataExtractorException() + { + } + + public PdfMetadataExtractorException(string message) + : base(message) + { + } + + public PdfMetadataExtractorException(string message, Exception inner) + : base(message, inner) + { + } +} + +public interface IPdfMetadataExtractor +{ + Dictionary GetMetadata(); +} + +class PdfStringBuilder +{ + private readonly StringBuilder _builder = new(); + private bool _secondByte = false; + private byte _prevByte = 0; + private bool _isUnicode = false; + + // PDFDocEncoding defined in PDF Spec D.1 + + private readonly char[] _pdfDocMappingLow = new char[] { + '\u02D8', '\u02C7', '\u02C6', '\u02D9', '\u02DD', '\u02DB', '\u02DA', '\u02DC', + }; + + private readonly char[] _pdfDocMappingHigh = new char[] { + '\u2022', '\u2020', '\u2021', '\u2026', '\u2014', '\u2013', '\u0192', '\u2044', + '\u2039', '\u203A', '\u2212', '\u2030', '\u201E', '\u201C', '\u201D', '\u2018', + '\u2019', '\u201A', '\u2122', '\uFB01', '\uFB02', '\u0141', '\u0152', '\u0160', + '\u0178', '\u017D', '\u0131', '\u0142', '\u0153', '\u0161', '\u017E', ' ', + '\u20AC', + }; + + public void AppendPdfDocByte(byte b) + { + if (b >= 0x18 && b < 0x20) + { + _builder.Append(_pdfDocMappingLow[b - 0x18]); + } + else if (b >= 0x80 && b < 0xA1) + { + _builder.Append(_pdfDocMappingHigh[b - 0x80]); + } + else + { + _builder.Append((char)b); + } + } + + public void Append(char c) + { + _builder.Append(c); + } + + public void AppendByte(byte b) + { + // PDF Spec 7.9.2.1: Strings are either UTF-16BE or PDFDocEncoded + if (_builder.Length == 0 && !_isUnicode) + { + // Unicode strings are prefixed by a big endian BOM \uFEFF + if (_secondByte) + { + if (b == 0xFF) + { + _isUnicode = true; + _secondByte = false; + } + else + { + AppendPdfDocByte(_prevByte); + AppendPdfDocByte(b); + } + } + else if (!_secondByte && b == 0xFE) + { + _secondByte = true; + _prevByte = b; + } + else + { + AppendPdfDocByte(b); + } + } + else if (_isUnicode) + { + if (_secondByte) + { + _builder.Append((char)(((char)_prevByte) << 8 | (char)b)); + _secondByte = false; + } + else + { + _prevByte = b; + _secondByte = true; + } + } + else + { + AppendPdfDocByte(b); + } + } + + override public string ToString() + { + if (_builder.Length == 0 && _secondByte) + { + AppendPdfDocByte(_prevByte); + } + + return _builder.ToString(); + } +} + +class PdfLexer(Stream stream) +{ + public enum TokenType + { + None, + Bool, + Int, + Double, + Name, + String, + ArrayStart, + ArrayEnd, + DictionaryStart, + DictionaryEnd, + StreamStart, + StreamEnd, + ObjectStart, + ObjectEnd, + ObjectRef, + Keyword, + Newline, + } + + public struct Token + { + public TokenType type; + public object value; + + public Token(TokenType type, object value) + { + this.type = type; + this.value = value; + } + } + + public Token NextToken(bool reportNewlines = false) + { + while (true) + { + switch ((char)NextByte()) + { + case '\n' when reportNewlines: + return new Token(TokenType.Newline, true); + + case '\r' when reportNewlines: + if (NextByte() != '\n') + { + PutBack(); + } + return new Token(TokenType.Newline, true); + + case ' ': + case '\x00': + case '\t': + case '\n': + case '\f': + case '\r': + continue; // Skip whitespace + + case '%': + SkipComment(); + continue; + + case '+': + case '-': + case '.': + case >= '0' and <= '9': + return ScanNumber(); + + case '/': + return ScanName(); + + case '(': + return ScanString(); + + case '[': + return new Token(TokenType.ArrayStart, true); + + case ']': + return new Token(TokenType.ArrayEnd, true); + + case '<': + if (NextByte() == '<') + { + return new Token(TokenType.DictionaryStart, true); + } + else + { + PutBack(); + return ScanHexString(); + } + case '>': + ExpectByte((byte)'>'); + + return new Token(TokenType.DictionaryEnd, true); + + case >= 'a' and <= 'z': + case >= 'A' and <= 'Z': + return ScanKeyword(); + + default: + throw new PdfMetadataExtractorException("Unexpected byte, got {LastByte()}"); + } + } + } + + public void ResetBuffer() + { + _pos = 0; + _valid = 0; + } + + public bool TestByte(byte expected) + { + var result = NextByte() == expected; + + PutBack(); + + return result; + } + + public void ExpectNewline() + { + while (true) + { + byte b = NextByte(); + switch ((char)b) + { + case ' ': + case '\t': + case '\f': + continue; // Skip whitespace + + case '\n': + return; + + case '\r': + if (NextByte() != '\n') + { + PutBack(); + } + + return; + + default: + throw new PdfMetadataExtractorException("Unexpected character, expected newline, got {b}"); + } + } + } + + public long GetXRefStart() + { + // Look for the startxref element as per PDF Spec 7.5.5 + while (true) + { + byte b = NextByte(); + + switch ((char)b) + { + case '\r': + b = NextByte(); + + if (b != '\n') + { + PutBack(); + } + + goto case '\n'; + + case '\n': + // Handle consecutive newlines + while (true) + { + b = NextByte(); + + if (b == '\r') + { + goto case '\r'; + } + else if (b == '\n') + { + goto case '\n'; + } + else if (b == ' ' || b == '\t' || b == '\f') + { + continue; + } + else + { + PutBack(); + + break; + } + } + + var token = NextToken(true); + + if (token.type == TokenType.Keyword && (string)token.value == "startxref") + { + token = NextToken(); + + if (token.type == TokenType.Int) + { + return (long)token.value; + } + else + { + throw new PdfMetadataExtractorException("Expected integer after startxref keyword"); + } + } + + continue; + + default: + continue; + } + } + } + + public bool NextXRefEntry(ref long obj, ref int generation) + { + // Cross-reference table entry as per PDF Spec 7.5.4 + + WantLookahead(20); + + if (_valid - _pos < 20) + { + throw new PdfMetadataExtractorException("End of stream"); + } + + var inUse = true; + + if (obj == 0) + { + obj = Convert.ToInt64(System.Text.Encoding.ASCII.GetString(_buffer, _pos, 10)); + generation = Convert.ToInt32(System.Text.Encoding.ASCII.GetString(_buffer, _pos + 11, 5)); + inUse = _buffer[_pos + 17] == 'n'; + } + + _pos += 20; + + return inUse; + } + + public Stream StreamObject(int length, bool deflate) + { + // Read a stream object as per PDF Spec 7.3.8 + // At the moment, we only accept uncompressed streams or the FlateDecode (PDF Spec 7.4.1) filter + // with no parameters. These cover the vast majority of streams we're interested in. + + var rawData = new MemoryStream(); + + ExpectNewline(); + + if (_pos < _valid) + { + int buffered = Math.Min(_valid - _pos, length); + rawData.Write(_buffer, _pos, buffered); + length -= buffered; + _pos += buffered; + } + + while (length > 0) + { + int buffered = Math.Min(length, _bufferSize); + stream.Read(_buffer, 0, buffered); + rawData.Write(_buffer, 0, buffered); + _pos = 0; + _valid = 0; + length -= buffered; + } + + rawData.Seek(0, SeekOrigin.Begin); + + if (deflate) + { + return new ZLibStream(rawData, CompressionMode.Decompress, false); + } + else + { + return rawData; + } + } + + private const int _bufferSize = 1024; + private readonly byte[] _buffer = new byte[_bufferSize]; + private int _pos = 0; + private int _valid = 0; + + private byte NextByte() + { + if (_pos >= _valid) + { + _pos = 0; + _valid = stream.Read(_buffer, 0, _bufferSize); + + if (_valid <= 0) + { + throw new PdfMetadataExtractorException("End of stream"); + } + } + + return _buffer[_pos++]; + } + + private byte LastByte() + { + return _buffer[_pos - 1]; + } + + private void PutBack() + { + --_pos; + } + + private void ExpectByte(byte expected) + { + if (NextByte() != expected) + { + throw new PdfMetadataExtractorException($"Unexpected character, expected {expected}"); + } + } + + private void WantLookahead(int length) + { + if (_pos + length > _valid) + { + Buffer.BlockCopy(_buffer, _pos, _buffer, 0, _valid - _pos); + _valid -= _pos; + _pos = 0; + _valid += stream.Read(_buffer, _valid, _bufferSize - _valid); + } + } + + private void SkipComment() + { + while (true) + { + byte b = NextByte(); + + if (b == '\n') + { + break; + } + else if (b == '\r') + { + if (NextByte() != '\n') + { + PutBack(); + } + + break; + } + } + } + + private Token ScanNumber() + { + StringBuilder sb = new(); + bool hasDot = LastByte() == '.'; + bool followedBySpace = false; + + sb.Append((char)LastByte()); + + while (true) + { + byte b = NextByte(); + + if (b == '.' || b >= '0' && b <= '9') + { + sb.Append((char)b); + + if (b == '.') + { + hasDot = true; + } + } + else + { + followedBySpace = (b == ' ' || b == '\t'); + PutBack(); + + break; + } + } + if (hasDot) + { + return new Token(TokenType.Double, double.Parse(sb.ToString())); + } + if (followedBySpace) + { + // Look ahead to see if it's an object reference (PDF Spec 7.3.10) + WantLookahead(32); + + var savedPos = _pos; + byte b = NextByte(); + + while (b == ' ' || b == '\t') + { + b = NextByte(); + } + + // Generation number (ignored) + while (b >= '0' && b <= '9') + { + b = NextByte(); + } + + while (b == ' ' || b == '\t') + { + b = NextByte(); + } + + if (b == 'R') + { + return new Token(TokenType.ObjectRef, long.Parse(sb.ToString())); + } + else if (b == 'o' && NextByte() == 'b' && NextByte() == 'j') + { + return new Token(TokenType.ObjectStart, long.Parse(sb.ToString())); + } + else + { + _pos = savedPos; + } + } + + return new Token(TokenType.Int, long.Parse(sb.ToString())); + } + + private int HexDigit(byte b) + { + switch ((char)b) + { + case >= '0' and <= '9': + return b - (byte)'0'; + + case >= 'a' and <= 'f': + return b - (byte)'a' + 10; + + case >= 'A' and <= 'F': + return b - (byte)'A' + 10; + + default: + throw new PdfMetadataExtractorException("Invalid hex digit, got {b}"); + } + } + + private Token ScanName() + { + // PDF Spec 7.3.5 + + StringBuilder sb = new StringBuilder(); + while (true) + { + byte b = NextByte(); + switch ((char)b) + { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '<': + case '>': + case '/': + case '%': + PutBack(); + + goto case ' '; + + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + return new Token(TokenType.Name, sb.ToString()); + + case '#': + byte b1 = NextByte(); + byte b2 = NextByte(); + b = (byte)((HexDigit(b1) << 4) | HexDigit(b2)); + + goto default; + + default: + sb.Append((char)b); + break; + } + } + } + + private Token ScanString() + { + // PDF Spec 7.3.4.2 + + PdfStringBuilder sb = new(); + int parenLevel = 1; + + while (true) + { + byte b = NextByte(); + + switch ((char)b) + { + case '(': + parenLevel++; + + goto default; + + case ')': + if (--parenLevel == 0) + { + return new Token(TokenType.String, sb.ToString()); + } + + goto default; + + case '\\': + b = NextByte(); + + switch ((char)b) + { + case 'b': + sb.Append('\b'); + + break; + + case 'f': + sb.Append('\f'); + + break; + + case 'n': + sb.Append('\n'); + + break; + + case 'r': + sb.Append('\r'); + + break; + + case 't': + sb.Append('\t'); + + break; + + case >= '0' and <= '7': + byte b1 = b; + byte b2 = NextByte(); + byte b3 = NextByte(); + + if (b2 < '0' || b2 > '7' || b3 < '0' || b3 > '7') + { + throw new PdfMetadataExtractorException("Invalid octal escape, got {b1}{b2}{b3}"); + } + + sb.AppendByte((byte)((b1 - '0') << 6 | (b2 - '0') << 3 | (b3 - '0'))); + + break; + } + break; + + default: + sb.AppendByte(b); + break; + } + } + } + + private Token ScanHexString() + { + // PDF Spec 7.3.4.3 + + PdfStringBuilder sb = new(); + + while (true) + { + byte b = NextByte(); + + switch ((char)b) + { + case (>= '0' and <= '9') or (>= 'a' and <= 'f') or (>= 'A' and <= 'F'): + byte b1 = NextByte(); + if (b1 == '>') + { + PutBack(); + b1 = (byte)'0'; + } + sb.AppendByte((byte)(HexDigit(b) << 4 | HexDigit(b1))); + + break; + + case '>': + return new Token(TokenType.String, sb.ToString()); + + default: + throw new PdfMetadataExtractorException("Invalid hex string, got {b}"); + } + } + } + + private Token ScanKeyword() + { + StringBuilder sb = new(); + + sb.Append((char)LastByte()); + + while (true) + { + byte b = NextByte(); + if ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) + { + sb.Append((char)b); + } + else + { + PutBack(); + + break; + } + } + + switch (sb.ToString()) + { + case "true": + return new Token(TokenType.Bool, true); + + case "false": + return new Token(TokenType.Bool, false); + + case "stream": + return new Token(TokenType.StreamStart, true); + + case "endstream": + return new Token(TokenType.StreamEnd, true); + + case "endobj": + return new Token(TokenType.ObjectEnd, true); + + default: + return new Token(TokenType.Keyword, sb.ToString()); + } + } +} + +class PdfMetadataExtractor : IPdfMetadataExtractor +{ + private readonly ILogger _logger; + private readonly PdfLexer _lexer; + private readonly FileStream _stream; + private long[] _objectOffsets = new long[0]; + private readonly Dictionary _metadata = new(); + + private struct MetadataRef + { + public long root; + public long info; + + public MetadataRef(long root, long info) + { + this.root = root; + this.info = info; + } + } + + private readonly Stack metadataRef = new(); + + private struct XRefSection + { + public long first; + public long count; + + public XRefSection(long first, long count) + { + this.first = first; + this.count = count; + } + } + + public PdfMetadataExtractor(ILogger logger, string filename) + { + _logger = logger; + _stream = File.OpenRead(filename); + _lexer = new PdfLexer(_stream); + + ReadObjectOffsets(); + ReadMetadata(filename); + + LogMetadata(filename); + } + + public Dictionary GetMetadata() + { + return _metadata; + } + + private void LogMetadata(string filename) + { + _logger.LogTrace("Metadata for {Path}:", filename); + + foreach (var entry in _metadata) + { + _logger.LogTrace(" {Key:0,-5} : {Value:1}", entry.Key, entry.Value); + } + } + + private void ReadObjectOffsets() + { + // Look for file trailer (PDF Spec 7.5.5) + // Spec says trailer must be strictly at end of file. + // Adobe software accepts trailer within last 1K of EOF, + // but in practice, virtually all PDFs have trailer at end. + + _stream.Seek(-32, SeekOrigin.End); + + long xrefOffset = _lexer.GetXRefStart(); + + ReadXRefAndTrailer(xrefOffset); + } + + private void ReadXRefAndTrailer(long xrefOffset) + { + _stream.Seek(xrefOffset, SeekOrigin.Begin); + _lexer.ResetBuffer(); + + if (!_lexer.TestByte((byte)'x')) + { + // Cross-reference stream (PDF Spec 7.5.8) + + ReadXRefStream(); + + return; + } + + // Cross-reference table (PDF Spec 7.5.4) + + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.Keyword || (string)token.value != "xref") + { + throw new PdfMetadataExtractorException("Expected xref keyword"); + } + + while (true) + { + token = _lexer.NextToken(); + + if (token.type == PdfLexer.TokenType.Int) + { + long startObj = (long)token.value; + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected number of objects in xref subsection"); + } + + long numObj = (long)token.value; + + if (_objectOffsets.Length < startObj + numObj) + { + Array.Resize(ref _objectOffsets, (int)(startObj + numObj)); + } + + _lexer.ExpectNewline(); + + int generation = 0; + + for (var obj = startObj; obj < startObj + numObj; ++obj) + { + bool inUse = _lexer.NextXRefEntry(ref _objectOffsets[obj], ref generation); + + if (!inUse) + { + _objectOffsets[obj] = 0; + } + } + } + else if (token.type == PdfLexer.TokenType.Keyword && (string)token.value == "trailer") + { + break; + } + else + { + throw new PdfMetadataExtractorException("Unexpected token in xref"); + } + } + + ReadTrailerDictionary(); + } + + private void ReadXRefStream() + { + // Cross-reference stream (PDF Spec 7.5.8) + + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ObjectStart) + { + throw new PdfMetadataExtractorException("Expected obj keyword"); + } + + long length = -1; + long size = -1; + bool deflate = false; + long prev = -1; + long typeWidth = -1; + long offsetWidth = -1; + long generationWidth = -1; + Queue sections = new(); + MetadataRef meta = new MetadataRef(-1, -1); + + // Cross-reference stream dictionary (PDF Spec 7.5.8.2) + + ParseDictionary(delegate(string key, PdfLexer.Token value) { + switch (key) + { + case "Type": + if (value.type != PdfLexer.TokenType.Name || (string)value.value != "XRef") + { + throw new PdfMetadataExtractorException("Expected /Type to be /XRef"); + } + + return true; + + case "Length": + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer after /Length"); + } + + length = (long)value.value; + + return true; + + case "Size": + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer after /Size"); + } + + size = (long)value.value; + + return true; + + case "Prev": + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected offset after /Prev"); + } + + prev = (long)value.value; + + return true; + + case "Index": + if (value.type != PdfLexer.TokenType.ArrayStart) + { + throw new PdfMetadataExtractorException("Expected array after /Index"); + } + + while (true) + { + token = _lexer.NextToken(); + + if (token.type == PdfLexer.TokenType.ArrayEnd) + { + break; + } + else if (token.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer in /Index array"); + } + + long first = (long)token.value; + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer pair in /Index array"); + } + + long count = (long)token.value; + sections.Enqueue(new XRefSection(first, count)); + } + + return true; + + case "W": + if (value.type != PdfLexer.TokenType.ArrayStart) + { + throw new PdfMetadataExtractorException("Expected array after /W"); + } + + long[] widths = new long[3]; + + for (int i = 0; i < 3; ++i) + { + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer in /W array"); + } + + widths[i] = (long)token.value; + } + + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ArrayEnd) + { + throw new PdfMetadataExtractorException("Unclosed array after /W"); + } + + typeWidth = widths[0]; + offsetWidth = widths[1]; + generationWidth = widths[2]; + + return true; + + case "Filter": + if (value.type != PdfLexer.TokenType.Name) + { + throw new PdfMetadataExtractorException("Expected name after /Filter"); + } + + if ((string)value.value != "FlateDecode") + { + throw new PdfMetadataExtractorException("Unsupported filter, only FlateDecode is supported"); + } + + deflate = true; + + return true; + + case "Root": + if (value.type != PdfLexer.TokenType.ObjectRef) + { + throw new PdfMetadataExtractorException("Expected object reference after /Root"); + } + + meta.root = (long)value.value; + + return true; + + case "Info": + if (value.type != PdfLexer.TokenType.ObjectRef) + { + throw new PdfMetadataExtractorException("Expected object reference after /Info"); + } + + meta.info = (long)value.value; + + return true; + + default: + return false; + } + }); + + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.StreamStart) + { + throw new PdfMetadataExtractorException("Expected xref stream after dictionary"); + } + + var stream = _lexer.StreamObject((int)length, deflate); + + if (sections.Count == 0) + { + sections.Enqueue(new XRefSection(0, size)); + } + + while (sections.Count > 0) + { + var section = sections.Dequeue(); + + if (_objectOffsets.Length < size) + { + Array.Resize(ref _objectOffsets, (int)size); + } + + for (long i = section.first; i < section.first + section.count; ++i) + { + long type = 0; + long offset = 0; + long generation = 0; + + if (typeWidth == 0) + { + type = 1; + } + + for (int j = 0; j < typeWidth; ++j) + { + type = (type << 8) | (UInt16)stream.ReadByte(); + } + + for (int j = 0; j < offsetWidth; ++j) + { + offset = (offset << 8) | (UInt16)stream.ReadByte(); + } + + for (int j = 0; j < generationWidth; ++j) + { + generation = (generation << 8) | (UInt16)stream.ReadByte(); + } + + if (type == 1 && _objectOffsets[i] == 0) + { + _objectOffsets[i] = offset; + } + } + } + + if (prev > -1) + { + ReadXRefAndTrailer(prev); + } + + PushMetadataRef(meta); + } + + private void PushMetadataRef(MetadataRef meta) + { + if (metadataRef.Count > 0) + { + if (meta.root == metadataRef.Peek().root) + { + meta.root = -1; + } + + if (meta.info == metadataRef.Peek().info) + { + meta.info = -1; + } + } + + if (meta.root != -1 || meta.info != -1) + { + metadataRef.Push(meta); + } + } + + private void ReadTrailerDictionary() + { + // Read trailer directory (PDF Spec 7.5.5) + + long prev = -1; + long xrefStm = -1; + + MetadataRef meta = new(-1, -1); + + ParseDictionary(delegate(string key, PdfLexer.Token value) + { + switch (key) + { + case "Root": + if (value.type != PdfLexer.TokenType.ObjectRef) + { + throw new PdfMetadataExtractorException("Expected object reference after /Root"); + } + + meta.root = (long)value.value; + + return true; + case "Prev": + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected offset after /Prev"); + } + + prev = (long)value.value; + + return true; + case "Info": + if (value.type != PdfLexer.TokenType.ObjectRef) + { + throw new PdfMetadataExtractorException("Expected object reference after /Info"); + } + + meta.info = (long)value.value; + + return true; + case "XRefStm": + // Prefer encoded xref stream over xref table + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected offset after /XRefStm"); + } + + xrefStm = (long)value.value; + + return true; + + case "Encrypt": + throw new PdfMetadataExtractorException("Encryption not supported"); + + default: + return false; + } + }); + + PushMetadataRef(meta); + + if (xrefStm != -1) + { + ReadXRefAndTrailer(xrefStm); + } + + if (prev != -1) + { + ReadXRefAndTrailer(prev); + } + } + + private void ReadMetadata(string filename) + { + // We read potential metadata sources in backwards historical order, so + // we can overwrite to our heart's content + + while (metadataRef.Count > 0) + { + var meta = metadataRef.Pop(); + + _logger.LogTrace("DocumentCatalog for {Path}: {Root}, Info: {Info}", filename, meta.root, meta.info); + + ReadMetadataFromInfo(meta.info); + ReadMetadataFromXML(MetadataObjInObjectCatalog(meta.root)); + } + } + + private void ReadMetadataFromInfo(long infoObj) + { + // Document information dictionary (PDF Spec 14.3.3) + // We treat this as less authoritative than the Metadata stream. + + if (infoObj < 1 || infoObj >= _objectOffsets.Length || _objectOffsets[infoObj] == 0) + { + return; + } + + _stream.Seek(_objectOffsets[infoObj], SeekOrigin.Begin); + _lexer.ResetBuffer(); + + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ObjectStart) + { + throw new PdfMetadataExtractorException("Expected object header"); + } + + Dictionary indirectObjects = new(); + + ParseDictionary(delegate(string key, PdfLexer.Token value) + { + switch (key) + { + case "Title": + case "Author": + case "Subject": + case "Keywords": + case "Creator": + case "Producer": + case "CreationDate": + case "ModDate": + if (value.type == PdfLexer.TokenType.ObjectRef) { + indirectObjects[key] = (long)value.value; + } + else if (value.type != PdfLexer.TokenType.String) + { + throw new PdfMetadataExtractorException("Expected string value"); + } + else + { + _metadata[key] = (string)value.value; + } + + return true; + + default: + return false; + } + }); + + // Resolve indirectly referenced values + foreach(var key in indirectObjects.Keys) { + _stream.Seek(_objectOffsets[indirectObjects[key]], SeekOrigin.Begin); + _lexer.ResetBuffer(); + + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ObjectStart) { + throw new PdfMetadataExtractorException("Expected object here"); + } + + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.String) { + throw new PdfMetadataExtractorException("Expected string"); + } + + _metadata[key] = (string)token.value; + } + } + + private long MetadataObjInObjectCatalog(long rootObj) + { + // Look for /Metadata entry in document catalog (PDF Spec 7.7.2) + + if (rootObj < 1 || rootObj >= _objectOffsets.Length || _objectOffsets[rootObj] == 0) + { + return -1; + } + + _stream.Seek(_objectOffsets[rootObj], SeekOrigin.Begin); + _lexer.ResetBuffer(); + + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ObjectStart) + { + throw new PdfMetadataExtractorException("Expected object header"); + } + + long meta = -1; + + ParseDictionary(delegate(string key, PdfLexer.Token value) + { + switch (key) { + case "Metadata": + if (value.type != PdfLexer.TokenType.ObjectRef) + { + throw new PdfMetadataExtractorException("Expected object number after /Metadata"); + } + + meta = (long)value.value; + + return true; + + default: + return false; + } + }); + + return meta; + } + + // Obtain metadata from XMP stream object + // See XMP specification: https://developer.adobe.com/xmp/docs/XMPSpecifications/ + // and Dublin Core: https://www.dublincore.org/specifications/dublin-core/ + + private string? GetTextFromXmlNode(XmlDocument doc, XmlNamespaceManager ns, string path) + { + return (doc.DocumentElement?.SelectSingleNode(path + "//rdf:li", ns) + ?? doc.DocumentElement?.SelectSingleNode(path, ns))?.InnerText; + } + + private string? GetListFromXmlNode(XmlDocument doc, XmlNamespaceManager ns, string path) + { + var nodes = doc.DocumentElement?.SelectNodes(path + "//rdf:li", ns); + + if (nodes == null) return null; + + var list = new StringBuilder(); + + foreach (XmlNode n in nodes) + { + if (list.Length > 0) + { + list.Append(","); + } + + list.Append(n.InnerText); + } + + return list.Length > 0 ? list.ToString() : null; + } + + private void SetMetadata(string key, string? value) + { + if (value == null) return; + + _metadata[key] = value; + } + + private void ReadMetadataFromXML(long meta) + { + if (meta < 1 || meta >= _objectOffsets.Length || _objectOffsets[meta] == 0) return; + + _stream.Seek(_objectOffsets[meta], SeekOrigin.Begin); + _lexer.ResetBuffer(); + + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.ObjectStart) + { + throw new PdfMetadataExtractorException("Expected object header"); + } + + long length = -1; + bool deflate = false; + + // Metadata stream dictionary (PDF Spec 14.3.2) + + ParseDictionary(delegate(string key, PdfLexer.Token value) + { + switch (key) { + case "Type": + if (value.type != PdfLexer.TokenType.Name || (string)value.value != "Metadata") + { + throw new PdfMetadataExtractorException("Expected /Type to be /Metadata"); + } + + return true; + + case "Subtype": + if (value.type != PdfLexer.TokenType.Name || (string)value.value != "XML") + { + throw new PdfMetadataExtractorException("Expected /Subtype to be /XML"); + } + + return true; + + case "Length": + if (value.type != PdfLexer.TokenType.Int) + { + throw new PdfMetadataExtractorException("Expected integer after /Length"); + } + + length = (long)value.value; + + return true; + + case "Filter": + if (value.type != PdfLexer.TokenType.Name) + { + throw new PdfMetadataExtractorException("Expected name after /Filter"); + } + + if ((string)value.value != "FlateDecode") + { + throw new PdfMetadataExtractorException("Unsupported filter, only FlateDecode is supported"); + } + + deflate = true; + + return true; + + default: + return false; + } + }); + + token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.StreamStart) + { + throw new PdfMetadataExtractorException("Expected xref stream after dictionary"); + } + + var xmlStream = _lexer.StreamObject((int)length, deflate); + + // Skip XMP header + while (true) { + var b = xmlStream.ReadByte(); + + if (b < 0) { + throw new PdfMetadataExtractorException("Reached EOF in XMP header"); + } + + if (b == '?') { + while (b == '?') { + b = xmlStream.ReadByte(); + } + + if (b == '>') { + break; + } + } + } + + var metaDoc = new XmlDocument(); + metaDoc.Load(xmlStream); + + var ns = new XmlNamespaceManager(metaDoc.NameTable); + ns.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + ns.AddNamespace("dc", "http://purl.org/dc/elements/1.1/"); + ns.AddNamespace("calibreSI", "http://calibre-ebook.com/xmp-namespace-series-index"); + ns.AddNamespace("calibre", "http://calibre-ebook.com/xmp-namespace"); + ns.AddNamespace("pdfx", "http://ns.adobe.com/pdfx/1.3/"); + ns.AddNamespace("prism", "http://prismstandard.org/namespaces/basic/2.0/"); + ns.AddNamespace("xmp", "http://ns.adobe.com/xap/1.0/"); + + SetMetadata("CreationDate", + GetTextFromXmlNode(metaDoc, ns, "//dc:date") + ?? GetTextFromXmlNode(metaDoc, ns, "//xmp:CreateDate")); + SetMetadata("Summary", GetTextFromXmlNode(metaDoc, ns, "//dc:description")); + SetMetadata("Publisher", GetTextFromXmlNode(metaDoc, ns, "//dc:publisher")); + SetMetadata("Author", GetListFromXmlNode(metaDoc, ns, "//dc:creator")); + SetMetadata("Title", GetTextFromXmlNode(metaDoc, ns, "//dc:title")); + SetMetadata("Subject", GetListFromXmlNode(metaDoc, ns, "//dc:subject")); + SetMetadata("Language", GetTextFromXmlNode(metaDoc, ns, "//dc:language")); + SetMetadata("ISBN", GetTextFromXmlNode(metaDoc, ns, "//pdfx:isbn") ?? GetTextFromXmlNode(metaDoc, ns, "//prism:isbn")); + SetMetadata("UserRating", GetTextFromXmlNode(metaDoc, ns, "//calibre:rating")); + SetMetadata("TitleSort", GetTextFromXmlNode(metaDoc, ns, "//calibre:title_sort")); + SetMetadata("Series", GetTextFromXmlNode(metaDoc, ns, "//calibre:series/rdf:value")); + SetMetadata("Volume", GetTextFromXmlNode(metaDoc, ns, "//calibreSI:series_index")); + } + + private delegate bool DictionaryHandler(string key, PdfLexer.Token value); + + private void ParseDictionary(DictionaryHandler handler) + { + var token = _lexer.NextToken(); + + if (token.type != PdfLexer.TokenType.DictionaryStart) + { + throw new PdfMetadataExtractorException("Expected dictionary"); + } + + while (true) + { + token = _lexer.NextToken(); + + if (token.type == PdfLexer.TokenType.DictionaryEnd) + { + return; + } + else if (token.type == PdfLexer.TokenType.Name) + { + var value = _lexer.NextToken(); + + if (!handler((string)token.value, value)) { + SkipValue(value); + } + } + else + { + throw new PdfMetadataExtractorException("Improper token in dictionary"); + } + } + } + + private void SkipValue(PdfLexer.Token? existingToken = null) + { + var token = existingToken ?? _lexer.NextToken(); + + switch (token.type) + { + case PdfLexer.TokenType.Bool: + case PdfLexer.TokenType.Int: + case PdfLexer.TokenType.Double: + case PdfLexer.TokenType.Name: + case PdfLexer.TokenType.String: + case PdfLexer.TokenType.ObjectRef: + break; + + case PdfLexer.TokenType.ArrayStart: + SkipArray(); + + break; + + case PdfLexer.TokenType.DictionaryStart: + SkipDictionary(); + + break; + + default: + throw new PdfMetadataExtractorException("Unexpected token in SkipValue"); + } + } + + private void SkipArray() + { + while (true) + { + var token = _lexer.NextToken(); + + if (token.type == PdfLexer.TokenType.ArrayEnd) + { + break; + } + + SkipValue(token); + } + } + + private void SkipDictionary() + { + while (true) + { + var token = _lexer.NextToken(); + + if (token.type == PdfLexer.TokenType.DictionaryEnd) + { + break; + } + else if (token.type != PdfLexer.TokenType.Name) + { + throw new PdfMetadataExtractorException("Expected name in dictionary"); + } + + SkipValue(); + } + } +} diff --git a/API/Services/BookService.cs b/API/Services/BookService.cs index 740227af88..55b652e1f6 100644 --- a/API/Services/BookService.cs +++ b/API/Services/BookService.cs @@ -6,12 +6,14 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; +using System.Xml; using API.Data.Metadata; using API.DTOs.Reader; using API.Entities; using API.Entities.Enums; using API.Extensions; using API.Services.Tasks.Scanner.Parser; +using API.Helpers; using Docnet.Core; using Docnet.Core.Converters; using Docnet.Core.Models; @@ -69,6 +71,8 @@ public class BookService : IBookService private static readonly RecyclableMemoryStreamManager StreamManager = new (); private const string CssScopeClass = ".book-content"; private const string BookApiUrl = "book-resources?file="; + private readonly PdfComicInfoExtractor _pdfComicInfoExtractor; + public static readonly EpubReaderOptions BookReaderOptions = new() { PackageReaderOptions = new PackageReaderOptions @@ -84,6 +88,7 @@ public BookService(ILogger logger, IDirectoryService directoryServi _directoryService = directoryService; _imageService = imageService; _mediaErrorService = mediaErrorService; + _pdfComicInfoExtractor = new PdfComicInfoExtractor(_logger, _mediaErrorService); } private static bool HasClickableHrefPart(HtmlNode anchor) @@ -425,10 +430,8 @@ await _mediaErrorService.ReportMediaIssueAsync(book.FilePath, MediaErrorProducer } } - public ComicInfo? GetComicInfo(string filePath) + private ComicInfo? GetEpubComicInfo(string filePath) { - if (!IsValidFile(filePath) || Parser.IsPdf(filePath)) return null; - try { using var epubBook = EpubReader.OpenBook(filePath, BookReaderOptions); @@ -442,7 +445,7 @@ await _mediaErrorService.ReportMediaIssueAsync(book.FilePath, MediaErrorProducer var (year, month, day) = GetPublicationDate(publicationDate); var summary = epubBook.Schema.Package.Metadata.Descriptions.FirstOrDefault(); - var info = new ComicInfo + var info = new ComicInfo { Summary = string.IsNullOrEmpty(summary?.Description) ? string.Empty : summary.Description, Publisher = string.Join(",", epubBook.Schema.Package.Metadata.Publishers.Select(p => p.Publisher)), @@ -583,6 +586,20 @@ await _mediaErrorService.ReportMediaIssueAsync(book.FilePath, MediaErrorProducer return null; } + public ComicInfo? GetComicInfo(string filePath) + { + if (!IsValidFile(filePath)) return null; + + if (Parser.IsPdf(filePath)) + { + return _pdfComicInfoExtractor.GetComicInfo(filePath); + } + else + { + return GetEpubComicInfo(filePath); + } + } + private static void ExtractSortTitle(EpubMetadataMeta metadataItem, EpubBookRef epubBook, ComicInfo info) { var titleId = metadataItem.Refines?.Replace("#", string.Empty); @@ -685,7 +702,7 @@ private static (int year, int month, int day) GetPublicationDate(string? publica return (year, month, day); } - private static string ValidateLanguage(string? language) + public static string ValidateLanguage(string? language) { if (string.IsNullOrEmpty(language)) return string.Empty; diff --git a/API/Services/ReadingItemService.cs b/API/Services/ReadingItemService.cs index 5816907332..efdaec8ff5 100644 --- a/API/Services/ReadingItemService.cs +++ b/API/Services/ReadingItemService.cs @@ -52,7 +52,7 @@ public ReadingItemService(IArchiveService archiveService, IBookService bookServi /// private ComicInfo? GetComicInfo(string filePath) { - if (Parser.IsEpub(filePath)) + if (Parser.IsEpub(filePath) || Parser.IsPdf(filePath)) { return _bookService.GetComicInfo(filePath); } diff --git a/API/Services/Tasks/Scanner/Parser/PdfParser.cs b/API/Services/Tasks/Scanner/Parser/PdfParser.cs index 696a618670..3a5debcbd8 100644 --- a/API/Services/Tasks/Scanner/Parser/PdfParser.cs +++ b/API/Services/Tasks/Scanner/Parser/PdfParser.cs @@ -68,6 +68,9 @@ public override ParserInfo Parse(string filePath, string rootPath, string librar ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret); } + // Patch in other information from ComicInfo + UpdateFromComicInfo(ret); + if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book) { ret.IsSpecial = true;