This repository has been archived by the owner on Oct 29, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpdf.go
146 lines (129 loc) · 3.92 KB
/
pdf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// pdf extracts metadata from pdf files
package pdf
import (
"bytes"
"github.com/datatogether/xmp"
unicommon "github.com/unidoc/unidoc/common"
unilicense "github.com/unidoc/unidoc/common/license"
pdf "github.com/unidoc/unidoc/pdf/core"
"io"
"os"
)
// MetadataForFile generates metadata from a filepath
func MetadataForFile(file string) (map[string]interface{}, error) {
// r, err := pdf.Open(file)
f, err := os.Open(file)
if err != nil {
return nil, err
}
return extract(f)
}
// MetadataForFile generates metadata from a byte slice
func MetadataForBytes(data []byte) (map[string]interface{}, error) {
return extract(bytes.NewReader(data))
}
func extract(r io.ReadSeeker) (map[string]interface{}, error) {
p, err := pdf.NewParser(r)
if err != nil {
return nil, err
}
// infoRdr, _ := pdf.NewPdfReader(r)
// info, _ := infoRdr.Inspect()
// fmt.Println(info)
i := 1
for {
// fmt.Println(i)
o, err := p.LookupByNumber(i)
if err != nil || o.String() == "null" {
break
}
if iobj, isIndirect := o.(*pdf.PdfIndirectObject); isIndirect {
// fmt.Printf("IND OOBJ %d: %s\n", xref.objectNumber, iobj)
if dict, isDict := iobj.PdfObject.(*pdf.PdfObjectDictionary); isDict {
// Check if has Type parameter.
if ot, has := dict.Get("Type").(*pdf.PdfObjectName); has {
// fmt.Printf("---> Obj type: %s\n", otype)
if ot.String() == "Catalog" {
// fmt.Println(dict.String())
for _, key := range dict.Keys() {
// TODO - check pdf spec, is only one metadata entry allowed?
if key.String() == "Metadata" {
oNum := dict.Get(key).(*pdf.PdfObjectReference).ObjectNumber
obj, err := p.LookupByNumber(int(oNum))
if err != nil {
return nil, err
}
if sobj, isStream := obj.(*pdf.PdfObjectStream); isStream {
packet, err := xmp.Unmarshal(sobj.Stream)
if err != nil {
return nil, err
}
return packet.AsPOD().AsObject()
}
}
}
}
}
// else if ot, has := (*dict)["Subtype"].(*pdf.PdfObjectName); has {
// // Check if subtype
// otype := string(*ot)
// // fmt.Printf("---> Obj subtype: %s\n", otype)
// }
// if val, has := (*dict)["S"].(*pdf.PdfObjectName); has && *val == "JavaScript" {
// }
}
}
// else if sobj, isStream := o.(*pdf.PdfObjectStream); isStream {
// // if otype, ok := (*(sobj.PdfObjectDictionary))["Type"].(*pdf.PdfObjectName); ok {
// // // fmt.Printf("--> Stream object type: %s\n", *otype)
// // // if otype.String() == "Metadata" {
// // // fmt.Println(string(sobj.Stream))
// // // }
// // }
// } else if dict, isDict := o.(*pdf.PdfObjectDictionary); isDict {
// ot, isName := (*dict)["Type"].(*pdf.PdfObjectName)
// if isName {
// // otype := string(*ot)
// // fmt.Println("object type:", otype)
// }
// } else {
// fmt.Println(o)
// }
i++
// break
}
// fmt.Println(pg.GetPageAsIndirectObject())
// fmt.Println(p.Inspect())
// fmt.Println(p.PageList)
// fmt.Println(pg)
// fmt.Println(pg.GetPageAsIndirectObject())
return nil, nil
}
// extract pulls metadata from a pdf reader
// func extract(r *pdf.Reader) (map[string]interface{}, error) {
// for i := 1; i <= r.NumPage(); i++ {
// fmt.Printf("interpret page %d\n", i)
// pdf.Interpret(r.Page(i).Resources(), func(stk *pdf.Stack, op string) {
// fmt.Println(op)
// fmt.Println(stk)
// })
// fmt.Printf("interpreted page %d\n", i)
// }
// return nil, nil
// }
func init() {
initUniDoc("")
}
func initUniDoc(licenseKey string) error {
if len(licenseKey) > 0 {
err := unilicense.SetLicenseKey(licenseKey)
if err != nil {
return err
}
}
// To make the library log we just have to initialise the logger which satisfies
// the unicommon.Logger interface, unicommon.DummyLogger is the default and
// does not do anything. Very easy to implement your own.
unicommon.SetLogger(unicommon.DummyLogger{})
return nil
}