Skip to content

Commit d8fd776

Browse files
committed
Use warcfields helper functions to ensure consistent conversion of values.
1 parent c1f3185 commit d8fd776

File tree

7 files changed

+42
-28
lines changed

7 files changed

+42
-28
lines changed

example_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ func Example_basic() {
3535
if wr, v, err := builder.Build(); err == nil {
3636
fmt.Println(wr, v)
3737
}
38-
// Output: WARC record: version: WARC/1.1, type: response, id: <urn:uuid:e9a0cecc-0221-11e7-adb1-0242ac120008>
38+
// Output: WARC record: version: WARC/1.1, type: response, id: urn:uuid:e9a0cecc-0221-11e7-adb1-0242ac120008
3939
}

headerfielddef.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func validateHeader(wf *WarcFields, version *WarcVersion, validation *Validation
9595
}
9696
}
9797
}
98-
contentLength, _ := strconv.ParseInt(wf.Get(ContentLength), 10, 64)
98+
contentLength, _ := wf.GetInt64(ContentLength)
9999
if rt != Continuation && contentLength > 0 && !wf.Has(ContentType) {
100100
switch opts.errSpec {
101101
case ErrWarn:

record.go

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"io"
2222
"strconv"
2323
"strings"
24+
"time"
2425
)
2526

2627
const (
@@ -38,6 +39,9 @@ type WarcRecord interface {
3839
Type() RecordType
3940
WarcHeader() *WarcFields
4041
Block() Block
42+
RecordId() string
43+
ContentLength() (int64, error)
44+
Date() (time.Time, error)
4145
String() string
4246
io.Closer
4347
// ToRevisitRecord takes RevisitRef referencing the record we want to make a revisit of and returns a revisit record.
@@ -192,8 +196,20 @@ func (wr *warcRecord) Block() Block {
192196
return wr.block
193197
}
194198

199+
func (wr *warcRecord) RecordId() string {
200+
return wr.headers.GetId(WarcRecordID)
201+
}
202+
203+
func (wr *warcRecord) ContentLength() (int64, error) {
204+
return wr.headers.GetInt64(ContentLength)
205+
}
206+
207+
func (wr *warcRecord) Date() (time.Time, error) {
208+
return wr.headers.GetTime(WarcDate)
209+
}
210+
195211
func (wr *warcRecord) String() string {
196-
return fmt.Sprintf("WARC record: version: %s, type: %s, id: %s", wr.version, wr.Type(), wr.WarcHeader().Get(WarcRecordID))
212+
return fmt.Sprintf("WARC record: version: %s, type: %s, id: %s", wr.version, wr.Type(), wr.WarcHeader().GetId(WarcRecordID))
197213
}
198214

199215
func (wr *warcRecord) Close() error {
@@ -227,7 +243,7 @@ func (wr *warcRecord) ToRevisitRecord(ref *RevisitRef) (WarcRecord, error) {
227243
h.Set(WarcType, Revisit.String())
228244
h.Set(WarcProfile, ref.Profile)
229245
if ref.TargetRecordId != "" {
230-
h.Set(WarcRefersTo, ref.TargetRecordId)
246+
h.SetId(WarcRefersTo, ref.TargetRecordId)
231247
}
232248
if ref.TargetUri != "" {
233249
h.Set(WarcRefersToTargetURI, ref.TargetUri)
@@ -242,7 +258,7 @@ func (wr *warcRecord) ToRevisitRecord(ref *RevisitRef) (WarcRecord, error) {
242258
return nil, err
243259
}
244260
h.Set(WarcBlockDigest, block.BlockDigest())
245-
h.Set(ContentLength, strconv.Itoa(len(block.headerBytes)))
261+
h.SetInt(ContentLength, len(block.headerBytes))
246262

247263
revisit := &warcRecord{
248264
opts: wr.opts,
@@ -261,7 +277,7 @@ func (wr *warcRecord) RevisitRef() (*RevisitRef, error) {
261277

262278
return &RevisitRef{
263279
Profile: wr.headers.Get(WarcProfile),
264-
TargetRecordId: wr.headers.Get(WarcRefersTo),
280+
TargetRecordId: wr.headers.GetId(WarcRefersTo),
265281
TargetUri: wr.headers.Get(WarcRefersToTargetURI),
266282
TargetDate: wr.headers.Get(WarcRefersToDate),
267283
}, nil
@@ -274,7 +290,7 @@ func (wr *warcRecord) CreateRevisitRef(profile string) (*RevisitRef, error) {
274290

275291
return &RevisitRef{
276292
Profile: profile,
277-
TargetRecordId: wr.headers.Get(WarcRecordID),
293+
TargetRecordId: wr.headers.GetId(WarcRecordID),
278294
TargetUri: wr.headers.Get(WarcTargetURI),
279295
TargetDate: wr.headers.Get(WarcDate),
280296
}, nil
@@ -309,21 +325,21 @@ func (wr *warcRecord) Merge(record ...WarcRecord) (WarcRecord, error) {
309325
}
310326
switch v := record[0].Block().(type) {
311327
case *httpRequestBlock:
312-
refLen, err := strconv.ParseInt(record[0].WarcHeader().Get(ContentLength), 10, 64)
328+
refLen, err := record[0].WarcHeader().GetInt64(ContentLength)
313329
if err != nil {
314330
return nil, fmt.Errorf("could not parse %s", ContentLength)
315331
}
316332
size := int64(len(b.headerBytes)) + refLen - int64(len(v.httpHeaderBytes))
317-
wr.headers.Set(ContentLength, strconv.FormatInt(size, 10))
333+
wr.headers.SetInt64(ContentLength, size)
318334
v.httpHeaderBytes = b.headerBytes
319335
wr.block = v
320336
case *httpResponseBlock:
321-
refLen, err := strconv.ParseInt(record[0].WarcHeader().Get(ContentLength), 10, 64)
337+
refLen, err := record[0].WarcHeader().GetInt64(ContentLength)
322338
if err != nil {
323339
return nil, fmt.Errorf("could not parse %s", ContentLength)
324340
}
325341
size := int64(len(b.headerBytes)) + refLen - int64(len(v.httpHeaderBytes))
326-
wr.headers.Set(ContentLength, strconv.FormatInt(size, 10))
342+
wr.headers.SetInt64(ContentLength, size)
327343
v.httpHeaderBytes = b.headerBytes
328344
wr.block = v
329345
default:

record_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) {
6060
&nameValue{Name: ContentType, Value: "application/http;msgtype=response"},
6161
&nameValue{Name: ContentLength, Value: "238"},
6262
&nameValue{Name: WarcProfile, Value: ProfileServerNotModifiedV1_1},
63-
&nameValue{Name: WarcRefersTo, Value: "targetId"},
63+
&nameValue{Name: WarcRefersTo, Value: "<targetId>"},
6464
&nameValue{Name: WarcTruncated, Value: "length"},
6565
},
6666
"HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" +
@@ -92,7 +92,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) {
9292
&nameValue{Name: ContentType, Value: "application/http;msgtype=response"},
9393
&nameValue{Name: ContentLength, Value: "238"},
9494
&nameValue{Name: WarcProfile, Value: ProfileServerNotModifiedV1_1},
95-
&nameValue{Name: WarcRefersTo, Value: "targetId"},
95+
&nameValue{Name: WarcRefersTo, Value: "<targetId>"},
9696
&nameValue{Name: WarcTruncated, Value: "length"},
9797
},
9898
"HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" +
@@ -126,7 +126,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) {
126126
&nameValue{Name: ContentType, Value: "application/http;msgtype=response"},
127127
&nameValue{Name: ContentLength, Value: "238"},
128128
&nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1},
129-
&nameValue{Name: WarcRefersTo, Value: "targetId"},
129+
&nameValue{Name: WarcRefersTo, Value: "<targetId>"},
130130
&nameValue{Name: WarcTruncated, Value: "length"},
131131
},
132132
"HTTP/1.1 200 OK\nDate: Tue, 19 Sep 2016 17:18:40 GMT\nServer: Apache/2.0.54 (Ubuntu)\n" +
@@ -174,7 +174,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) {
174174
&nameValue{Name: ContentType, Value: "text/plain"},
175175
&nameValue{Name: ContentLength, Value: "0"},
176176
&nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1},
177-
&nameValue{Name: WarcRefersTo, Value: "targetId"},
177+
&nameValue{Name: WarcRefersTo, Value: "<targetId>"},
178178
&nameValue{Name: WarcTruncated, Value: "length"},
179179
},
180180
"",
@@ -204,7 +204,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) {
204204
&nameValue{Name: ContentType, Value: "text/plain"},
205205
&nameValue{Name: ContentLength, Value: "0"},
206206
&nameValue{Name: WarcProfile, Value: ProfileIdenticalPayloadDigestV1_1},
207-
&nameValue{Name: WarcRefersTo, Value: "targetId"},
207+
&nameValue{Name: WarcRefersTo, Value: "<targetId>"},
208208
&nameValue{Name: WarcTruncated, Value: "length"},
209209
},
210210
"",

recordbuilder.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package gowarc
1919
import (
2020
"github.com/nlnwa/gowarc/internal/diskbuffer"
2121
"io"
22-
"strconv"
2322
"time"
2423
)
2524

@@ -70,17 +69,17 @@ func (rb *recordBuilder) AddWarcHeader(name string, value string) {
7069

7170
// AddWarcHeaderInt adds a new WARC header field with the given name and an int value to the record
7271
func (rb *recordBuilder) AddWarcHeaderInt(name string, value int) {
73-
rb.headers.Add(name, strconv.Itoa(value))
72+
rb.headers.AddInt(name, value)
7473
}
7574

7675
// AddWarcHeaderInt64 adds a new WARC header field with the given name and an int64 value to the record
7776
func (rb *recordBuilder) AddWarcHeaderInt64(name string, value int64) {
78-
rb.headers.Add(name, strconv.FormatInt(value, 10))
77+
rb.headers.AddInt64(name, value)
7978
}
8079

8180
// AddWarcHeaderTime adds a new WARC header field with the given name and a time.Time value to the record
8281
func (rb *recordBuilder) AddWarcHeaderTime(name string, value time.Time) {
83-
rb.headers.Add(name, value.UTC().Format(time.RFC3339))
82+
rb.headers.AddTime(name, value)
8483
}
8584

8685
// Close releases resources used by the WarcRecordBuilder
@@ -109,7 +108,7 @@ func (rb *recordBuilder) Build() (WarcRecord, *Validation, error) {
109108
if id, err := rb.opts.recordIdFunc(); err != nil {
110109
return nil, nil, err
111110
} else {
112-
rb.headers.Set(WarcRecordID, "<"+id+">")
111+
rb.headers.SetId(WarcRecordID, id)
113112
}
114113
}
115114

@@ -139,9 +138,9 @@ func (rb *recordBuilder) Build() (WarcRecord, *Validation, error) {
139138
}
140139

141140
func (rb *recordBuilder) validate(wr *warcRecord) (*Validation, error) {
142-
size := strconv.FormatInt(rb.content.Size(), 10)
141+
size := rb.content.Size()
143142
if rb.opts.addMissingContentLength && !wr.WarcHeader().Has(ContentLength) {
144-
wr.headers.Set(ContentLength, size)
143+
wr.headers.SetInt64(ContentLength, size)
145144
}
146145

147146
validation := &Validation{}

unmarshaler.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"github.com/nlnwa/gowarc/internal/countingreader"
2525
"io"
2626
"io/ioutil"
27-
"strconv"
2827
)
2928

3029
type Unmarshaler interface {
@@ -142,7 +141,7 @@ func (u *unmarshaler) Unmarshal(b *bufio.Reader) (WarcRecord, int64, *Validation
142141
closer: nil,
143142
}
144143

145-
length, _ := strconv.ParseInt(record.headers.Get(ContentLength), 10, 64)
144+
length, _ := record.headers.GetInt64(ContentLength)
146145

147146
content := countingreader.NewLimited(r, length)
148147
record.closer = func() error {

warcfile.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ func (w *WarcFileWriter) createWriteJob(record ...WarcRecord) (*job, <-chan []Wr
227227
if k == k2 {
228228
continue
229229
}
230-
wr.WarcHeader().Add(WarcConcurrentTo, wr2.WarcHeader().Get(WarcRecordID))
230+
wr.WarcHeader().AddId(WarcConcurrentTo, wr2.WarcHeader().GetId(WarcRecordID))
231231
}
232232
}
233233
}
@@ -384,7 +384,7 @@ func (w *singleWarcFileWriter) writeRecord(writer io.Writer, record WarcRecord,
384384
writer = w.gz
385385
}
386386
if w.currentWarcInfoId != "" {
387-
record.WarcHeader().Set(WarcWarcinfoID, w.currentWarcInfoId)
387+
record.WarcHeader().SetId(WarcWarcinfoID, w.currentWarcInfoId)
388388
}
389389
nextRec, size, err := w.opts.marshaler.Marshal(writer, record, maxRecordSize)
390390
if err != nil {
@@ -417,7 +417,7 @@ func (w *singleWarcFileWriter) createWarcInfoRecord(fileName string) (int64, err
417417
if err != nil {
418418
return 0, err
419419
}
420-
w.currentWarcInfoId = warcinfo.WarcHeader().Get(WarcRecordID)
420+
w.currentWarcInfoId = warcinfo.WarcHeader().GetId(WarcRecordID)
421421
if w.opts.flush {
422422
// sync file to reduce possibility of half written records in case of crash
423423
if err := w.currentFile.Sync(); err != nil {

0 commit comments

Comments
 (0)