-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv-to-mysql.py
96 lines (72 loc) · 2.13 KB
/
csv-to-mysql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# Run with no args for usage instructions
#
# Notes:
# - will probably insert duplicate records if you load the same file twice
# - assumes that the number of fields in the header row is the same
# as the number of columns in the rest of the file and in the database
# - assumes the column order is the same in the file and in the database
#
# Speed: ~ 1s/MB
#
import sys
import MySQLdb
import csv
def main(user, db, table, csvfile):
try:
conn = getconn(user, db,)
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
sys.exit (1)
cursor = conn.cursor()
loadcsv(cursor, table, csvfile)
cursor.close()
conn.close()
def getconn(user, db, passwd=""):
conn = MySQLdb.connect(host = "localhost",
user = user,
passwd = "",
db = db)
return conn
def nullify(L):
"""Convert empty strings in the given list to None."""
# helper function
def f(x):
if(x == ""):
return None
else:
return x
return [f(x) for x in L]
def loadcsv(cursor, table, filename):
"""
Open a csv file and load it into a sql table.
Assumptions:
- the first line in the file is a header
"""
f = csv.reader(open(filename))
header = f.next()
numfields = len(header)
query = buildInsertCmd(table, numfields)
for line in f:
vals = nullify(line)
cursor.execute(query, vals)
return
def buildInsertCmd(table, numfields):
"""
Create a query string with the given table name and the right
number of format placeholders.
example:
>>> buildInsertCmd("foo", 3)
'insert into foo values (%s, %s, %s)'
"""
assert(numfields > 0)
placeholders = (numfields-1) * "%s, " + "%s"
query = ("insert into %s" % table) + (" values (%s)" % placeholders)
return query
if __name__ == '__main__':
# commandline execution
args = sys.argv[1:]
if(len(args) < 4):
print "error: arguments: user db table csvfile"
sys.exit(1)
main(*args)