Skip to content

Commit e0bfc75

Browse files
Merge pull request #3 from thatInfrastructureGuy/feature/addToQueue
Feature/add to queue
2 parents 1d7c44a + e216b54 commit e0bfc75

File tree

5 files changed

+243
-245
lines changed

5 files changed

+243
-245
lines changed

config.go

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package sqltocsvgzip
22

33
import (
4+
"bytes"
45
"compress/flate"
56
"database/sql"
67
"os"
@@ -12,15 +13,26 @@ const (
1213
minFileSize = 5 * 1024 * 1024
1314
)
1415

15-
type s3Obj struct {
16+
type obj struct {
1617
partNumber int64
1718
buf []byte
1819
}
1920

21+
type LogLevel int
22+
23+
const (
24+
Error LogLevel = 1
25+
Warn LogLevel = 2
26+
Info LogLevel = 3
27+
Debug LogLevel = 4
28+
Verbose LogLevel = 5
29+
)
30+
2031
// Converter does the actual work of converting the rows to CSV.
2132
// There are a few settings you can override if you want to do
2233
// some fancy stuff to your CSV.
2334
type Converter struct {
35+
LogLevel LogLevel
2436
Headers []string // Column headers to use (default is rows.Columns())
2537
WriteHeaders bool // Flag to output headers in your CSV (default is true)
2638
TimeFormat string // Format string for any time.Time values (default is time's default)
@@ -30,22 +42,23 @@ type Converter struct {
3042
GzipGoroutines int
3143
GzipBatchPerGoroutine int
3244
SingleThreaded bool
33-
Debug bool
3445
S3Bucket string
3546
S3Region string
3647
S3Acl string
3748
S3Path string
3849
S3Upload bool
39-
S3UploadThreads int
40-
S3UploadMaxPartSize int64
50+
UploadThreads int
51+
UploadPartSize int
4152

4253
s3Svc *s3.S3
4354
s3Resp *s3.CreateMultipartUploadOutput
44-
s3Uploadable chan *s3Obj
4555
s3CompletedParts []*s3.CompletedPart
4656
rows *sql.Rows
4757
rowPreProcessor CsvPreProcessorFunc
48-
gzipBuf []byte
58+
gzipBuf *bytes.Buffer
59+
partNumber int64
60+
uploadQ chan *obj
61+
quit chan bool
4962
}
5063

5164
// CsvPreprocessorFunc is a function type for preprocessing your CSV.
@@ -70,40 +83,29 @@ func New(rows *sql.Rows) *Converter {
7083
WriteHeaders: true,
7184
Delimiter: ',',
7285
CompressionLevel: flate.DefaultCompression,
73-
GzipGoroutines: 6,
74-
GzipBatchPerGoroutine: 180000,
86+
GzipGoroutines: 10,
87+
GzipBatchPerGoroutine: 100000,
88+
UploadPartSize: 5 * 1024 * 1025, // Should be greater than 1 * 1024 * 1024 for pgzip
89+
LogLevel: Info,
7590
}
7691
}
7792

78-
// DefaultConfig sets the following variables.
79-
//
80-
// WriteHeaders: true,
81-
// Delimiter: ',',
82-
// CompressionLevel: flate.DefaultCompression,
83-
// GzipGoroutines: 6,
84-
// GzipBatchPerGoroutine: 180000,
85-
// S3Upload: true,
86-
// S3UploadThreads: 6,
87-
// S3UploadMaxPartSize: 5 * 1024 * 1025, // Should be greater than 5 * 1024 * 1024
88-
// S3Bucket: os.Getenv("S3_BUCKET"),
89-
// S3Path: os.Getenv("S3_PATH"),
90-
// S3Region: os.Getenv("S3_REGION"),
91-
// S3Acl: os.Getenv("S3_ACL"), // If empty, defaults to bucket-owner-full-control
92-
//
93+
// DefaultConfig sets the default values for Converter struct.
9394
func DefaultConfig(rows *sql.Rows) *Converter {
9495
return &Converter{
9596
rows: rows,
9697
WriteHeaders: true,
9798
Delimiter: ',',
9899
CompressionLevel: flate.DefaultCompression,
99-
GzipGoroutines: 6,
100-
GzipBatchPerGoroutine: 180000,
100+
GzipGoroutines: 10,
101+
GzipBatchPerGoroutine: 100000,
101102
S3Upload: true,
102-
S3UploadThreads: 6,
103-
S3UploadMaxPartSize: 5 * 1024 * 1025, // Should be greater than 5 * 1024 * 1024
103+
UploadThreads: 6,
104+
UploadPartSize: 5 * 1024 * 1025, // Should be greater than 5 * 1024 * 1024 for s3 upload
104105
S3Bucket: os.Getenv("S3_BUCKET"),
105106
S3Path: os.Getenv("S3_PATH"),
106107
S3Region: os.Getenv("S3_REGION"),
107108
S3Acl: os.Getenv("S3_ACL"),
109+
LogLevel: Info,
108110
}
109111
}

csv.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
package sqltocsvgzip
22

33
import (
4+
"bytes"
5+
"encoding/csv"
46
"fmt"
57
"time"
68
)
79

10+
func (c *Converter) getCSVWriter() (*csv.Writer, *bytes.Buffer) {
11+
// Same size as sqlRowBatch
12+
var csvBuffer bytes.Buffer
13+
14+
// CSV writer to csvBuffer
15+
csvWriter := csv.NewWriter(&csvBuffer)
16+
17+
// Set delimiter
18+
if c.Delimiter != '\x00' {
19+
csvWriter.Comma = c.Delimiter
20+
}
21+
22+
return csvWriter, &csvBuffer
23+
}
24+
825
func (c *Converter) setCSVHeaders() ([]string, int, error) {
926
var headers []string
1027
columnNames, err := c.rows.Columns()

getter.go

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,38 +9,17 @@ import (
99

1010
// getSqlBatchSize gets the size of rows to be retrieved.
1111
// This batch is worked upon entirely before flushing to disk.
12-
func (c *Converter) getSqlBatchSize(totalColumns int) int {
12+
func (c *Converter) getSqlBatchSize(totalColumns int) {
1313
// Use sqlBatchSize set by user
1414
if c.SqlBatchSize != 0 {
15-
return c.SqlBatchSize
15+
return
1616
}
1717

1818
// Default to 4096
1919
c.SqlBatchSize = 4096
20-
21-
// Use Default value when Single thread.
22-
if c.SingleThreaded {
23-
return c.SqlBatchSize
24-
}
25-
26-
// If Multi-threaded, then block size should be atleast 1Mb = 1048576 bytes
27-
// See https://github.com/klauspost/pgzip
28-
29-
// (String X SqlBatchSize X TotalColumns) > 1048576
30-
// String = 16 bytes
31-
// (SqlBatchSize X TotalColumns) > 65536
32-
33-
for (c.SqlBatchSize * totalColumns) <= 65536 {
34-
c.SqlBatchSize = c.SqlBatchSize * 2
35-
}
36-
37-
// We aim for 1.5 MB - 2 MB to be on a safe side
38-
c.SqlBatchSize = c.SqlBatchSize * 2
39-
40-
return c.SqlBatchSize
4120
}
4221

43-
func (c *Converter) selectCompressionMethod(writer io.Writer) (io.WriteCloser, error) {
22+
func (c *Converter) getGzipWriter(writer io.Writer) (io.WriteCloser, error) {
4423
// Use gzip if single threaded
4524
if c.SingleThreaded {
4625
zw, err := gzip.NewWriterLevel(writer, c.CompressionLevel)

s3.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package sqltocsvgzip
33
import (
44
"bytes"
55
"fmt"
6-
"log"
7-
"os"
6+
"io"
7+
"net/url"
88
"sync"
99

1010
"github.com/aws/aws-sdk-go/aws"
@@ -35,7 +35,7 @@ func (c *Converter) createMultipartRequest() (err error) {
3535
return err
3636
}
3737

38-
log.Println("Created multipart upload request.")
38+
c.writeLog(Info, "Created multipart upload request.")
3939
return nil
4040
}
4141

@@ -59,7 +59,7 @@ func (c *Converter) createS3Session() error {
5959
}
6060

6161
func (c *Converter) abortMultipartUpload() error {
62-
log.Println("Aborting multipart upload for UploadId: " + *c.s3Resp.UploadId)
62+
c.writeLog(Info, "Aborting multipart upload for UploadId: "+aws.StringValue(c.s3Resp.UploadId))
6363
abortInput := &s3.AbortMultipartUploadInput{
6464
Bucket: c.s3Resp.Bucket,
6565
Key: c.s3Resp.Key,
@@ -70,7 +70,7 @@ func (c *Converter) abortMultipartUpload() error {
7070
}
7171

7272
func (c *Converter) completeMultipartUpload() (*s3.CompleteMultipartUploadOutput, error) {
73-
log.Println("Completing multipart upload for UploadId: " + *c.s3Resp.UploadId)
73+
c.writeLog(Info, "Completing multipart upload for UploadId: "+aws.StringValue(c.s3Resp.UploadId))
7474
completeInput := &s3.CompleteMultipartUploadInput{
7575
Bucket: c.s3Resp.Bucket,
7676
Key: c.s3Resp.Key,
@@ -95,17 +95,17 @@ func (c *Converter) uploadPart(partNumber int64, buf []byte, mu *sync.RWMutex) (
9595
for tryNum <= maxRetries {
9696
uploadResult, err := c.s3Svc.UploadPart(partInput)
9797
if err != nil {
98-
log.Println(err)
98+
c.writeLog(Error, err.Error())
9999
if tryNum == maxRetries {
100100
if aerr, ok := err.(awserr.Error); ok {
101101
return aerr
102102
}
103103
return err
104104
}
105-
log.Println("Retrying to upload part: #", partNumber)
105+
c.writeLog(Info, fmt.Sprintf("Retrying to upload part: #%v", partNumber))
106106
tryNum++
107107
} else {
108-
log.Println("Uploaded part: #", partNumber)
108+
c.writeLog(Info, fmt.Sprintf("Uploaded part: #%v", partNumber))
109109
mu.Lock()
110110
c.s3CompletedParts = append(c.s3CompletedParts, &s3.CompletedPart{
111111
ETag: uploadResult.ETag,
@@ -118,7 +118,12 @@ func (c *Converter) uploadPart(partNumber int64, buf []byte, mu *sync.RWMutex) (
118118
return nil
119119
}
120120

121-
func (c *Converter) UploadObjectToS3(f *os.File) error {
121+
func (c *Converter) UploadObjectToS3(w io.Writer) error {
122+
buf, ok := w.(*bytes.Buffer)
123+
if !ok {
124+
return fmt.Errorf("Expected buffer. Got %T", w)
125+
}
126+
122127
fileType := "application/x-gzip"
123128

124129
// The session the S3 Uploader will use
@@ -135,12 +140,16 @@ func (c *Converter) UploadObjectToS3(f *os.File) error {
135140
Key: aws.String(c.S3Path),
136141
ACL: aws.String(c.S3Acl),
137142
ContentType: aws.String(fileType),
138-
Body: f,
143+
Body: bytes.NewReader(buf.Bytes()),
139144
})
140145
if err != nil {
141146
return err
142147
}
143148

144-
log.Println(res.Location)
149+
uploadPath, err := url.PathUnescape(res.Location)
150+
if err != nil {
151+
return err
152+
}
153+
c.writeLog(Info, "Successfully uploaded file: "+uploadPath)
145154
return nil
146155
}

0 commit comments

Comments
 (0)