Skip to content
This repository was archived by the owner on Aug 21, 2023. It is now read-only.

Commit 11d8d5d

Browse files
authored
fix the bug that output csv files are invalid when csv-delimiter is empty (#219)
* fix Makefile * fix csv bug * add lightning csv integration test * add separator test * simplify code * fix ci test * tmp * block some tests * fix bug * address comments * fix ut and make check * fix integration test * fix * fix * try to fix * try to fix * Revert "try to fix" This reverts commit ecdbeaa. * try to fix * address comments * address comments
1 parent c90415b commit 11d8d5d

17 files changed

+290
-48
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ PACKAGE_DIRECTORIES := $(PACKAGES) | sed 's/github.com\/pingcap\/dumpling\/*//'
33
DUMPLING_PKG := github.com/pingcap/dumpling
44
CHECKER := awk '{ print } END { if (NR > 0) { exit 1 } }'
55

6-
LDFLAGS += -X "github.com/pingcap/dumpling/v4/cli.ReleaseVersion=$(shell git describe --tags --dirty)"
6+
LDFLAGS += -X "github.com/pingcap/dumpling/v4/cli.ReleaseVersion=$(shell git describe --tags --dirty='-dev')"
77
LDFLAGS += -X "github.com/pingcap/dumpling/v4/cli.BuildTimestamp=$(shell date -u '+%Y-%m-%d %I:%M:%S')"
88
LDFLAGS += -X "github.com/pingcap/dumpling/v4/cli.GitHash=$(shell git rev-parse HEAD)"
99
LDFLAGS += -X "github.com/pingcap/dumpling/v4/cli.GitBranch=$(shell git rev-parse --abbrev-ref HEAD)"
@@ -24,7 +24,7 @@ bin/%: cmd/%/main.go $(wildcard v4/**/*.go)
2424
$(GOBUILD) $(RACEFLAG) -tags codes -o $@ $<
2525

2626
test: failpoint-enable
27-
$(GOTEST) $(RACEFLAG) -tags leak ./... || ( make failpoint-disable && exit 1 )
27+
$(GOTEST) $(RACEFLAG) -coverprofile=coverage.txt -covermode=atomic -tags leak ./... || ( make failpoint-disable && exit 1 )
2828
@make failpoint-disable
2929

3030
integration_test: failpoint-enable bin/dumpling

tests/basic/run.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ export DUMPLING_TEST_PORT=4000
3939
# Test for --sql option.
4040
run_sql "drop database if exists \`$DB_NAME\`;"
4141
run_sql "create database \`$DB_NAME\`;"
42-
run_sql "create sequence \`$DB_NAME\`.\`SEQUENCE_NAME\` increment by 1;"
42+
run_sql "create sequence \`$DB_NAME\`.\`$SEQUENCE_NAME\` increment by 1;"
4343

44-
run_dumpling --sql "select nextval(\`$DB_NAME\`.\`SEQUENCE_NAME\`)"
44+
run_dumpling --sql "select nextval(\`$DB_NAME\`.\`$SEQUENCE_NAME\`)"
4545

4646
actual=$(grep -w "(.*)[,|;]" ${DUMPLING_OUTPUT_DIR}/result.000000000.sql | cut -c2-2)
4747
echo "expected 1, actual ${actual}"
4848
[ "$actual" = 1 ]
4949

50-
run_dumpling --sql "select nextval(\`$DB_NAME\`.\`SEQUENCE_NAME\`)"
50+
run_dumpling --sql "select nextval(\`$DB_NAME\`.\`$SEQUENCE_NAME\`)"
5151

5252
actual=$(grep -w "(.*)[,|;]" ${DUMPLING_OUTPUT_DIR}/result.000000000.sql | cut -c2-2)
5353
echo "expected 2, actual ${actual}"

tests/e2e_csv/conf/diff_config.toml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# diff Configuration.
2+
3+
log-level = "info"
4+
5+
chunk-size = 1000
6+
7+
check-thread-count = 4
8+
9+
sample-percent = 100
10+
11+
use-rowid = false
12+
13+
use-checksum = true
14+
15+
fix-sql-file = "fix.sql"
16+
17+
# tables need to check.
18+
[[check-tables]]
19+
schema = "e2e_csv"
20+
tables = ["escape", "t"]
21+
22+
[[table-config]]
23+
schema = "e2e_csv"
24+
table = "t"
25+
26+
[[table-config.source-tables]]
27+
instance-id = "source-1"
28+
schema = "e2e_csv"
29+
table = "t"
30+
31+
[[table-config]]
32+
schema = "e2e_csv"
33+
table = "escape"
34+
35+
[[table-config.source-tables]]
36+
instance-id = "source-1"
37+
schema = "e2e_csv"
38+
table = "escape"
39+
40+
[[source-db]]
41+
host = "127.0.0.1"
42+
port = 3306
43+
user = "root"
44+
password = ""
45+
instance-id = "source-1"
46+
47+
[target-db]
48+
host = "127.0.0.1"
49+
port = 4000
50+
user = "root"
51+
password = ""

tests/e2e_csv/conf/lightning.toml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
### tidb-lightning config
2+
3+
[lightning]
4+
server-mode = false
5+
level = "error"
6+
check-requirements = false
7+
8+
[tikv-importer]
9+
backend = "tidb"
10+
on-duplicate = "error"
11+
12+
[mydumper]
13+
data-source-dir = "/tmp/dumpling_test_result/sql_res.e2e_csv"
14+
15+
[mydumper.csv]
16+
separator = 'separator-place-holder'
17+
delimiter = "delimiter-place-holder"
18+
header = true
19+
not-null = false
20+
null = '\N'
21+
backslash-escape = backslash-escape-place-holder
22+
trim-last-separator = false
23+
24+
[tidb]
25+
host = "127.0.0.1"
26+
port = 4000
27+
user = "root"
28+
password = ""
29+
status-port = 10080
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE DATABASE `e2e_csv` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_bin */ /*!80016 DEFAULT ENCRYPTION='N' */;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../naughty_strings/data/naughty_strings.escape-schema.sql

tests/e2e_csv/data/e2e_csv.escape.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../naughty_strings/data/naughty_strings.escape.sql
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../naughty_strings/data/naughty_strings.t-schema.sql

tests/e2e_csv/data/e2e_csv.t.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../naughty_strings/data/naughty_strings.t.sql

tests/e2e_csv/run.sh

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
4+
5+
set -eu
6+
cur=$(cd `dirname $0`; pwd)
7+
8+
DB_NAME="e2e_csv"
9+
10+
# drop database on mysql
11+
export DUMPLING_TEST_PORT=3306
12+
run_sql "drop database if exists $DB_NAME;"
13+
14+
run_sql_file "$DUMPLING_BASE_NAME/data/e2e_csv-schema-create.sql"
15+
export DUMPLING_TEST_DATABASE="e2e_csv"
16+
run_sql_file "$DUMPLING_BASE_NAME/data/e2e_csv.escape-schema.sql"
17+
run_sql_file "$DUMPLING_BASE_NAME/data/e2e_csv.escape.sql"
18+
run_sql_file "$DUMPLING_BASE_NAME/data/e2e_csv.t-schema.sql"
19+
20+
mkdir -p $DUMPLING_TEST_DIR/data
21+
# lightning will omit empty lines without delimiters now, skip these cases
22+
sed "s/('')/-- ('')/g" "$DUMPLING_BASE_NAME/data/e2e_csv.t.sql" | sed "s/(' ')/-- (' ')/g" > $DUMPLING_TEST_DIR/data/e2e_csv.t.sql
23+
run_sql_file "$DUMPLING_TEST_DIR/data/e2e_csv.t.sql"
24+
25+
run() {
26+
echo "*** running subtest case ***"
27+
echo "escape_backslash is $escape_backslash"
28+
echo "csv_delimiter is $csv_delimiter"
29+
echo "csv_separator is $csv_separator"
30+
31+
# drop database on tidb
32+
export DUMPLING_TEST_PORT=4000
33+
export DUMPLING_TEST_DATABASE=""
34+
run_sql "drop database if exists $DB_NAME;"
35+
36+
# dumping
37+
export DUMPLING_TEST_PORT=3306
38+
export DUMPLING_TEST_DATABASE=$DB_NAME
39+
run_dumpling --filetype="csv" --escape-backslash=$escape_backslash --csv-delimiter="$csv_delimiter" --csv-separator="$csv_separator"
40+
41+
# construct lightning configuration
42+
mkdir -p $DUMPLING_TEST_DIR/conf
43+
cp "$cur/conf/lightning.toml" $DUMPLING_TEST_DIR/conf
44+
45+
sed -i -e "s/separator-place-holder/$csv_separator/g" $DUMPLING_TEST_DIR/conf/lightning.toml
46+
csv_delimiter_holder=$csv_delimiter
47+
if [ "$csv_delimiter" = '"' ]; then
48+
# We want to replace delimiter-place-holder in lightning.toml to \",
49+
# but sed will identify \" as ", so we need to use \\\" here.
50+
csv_delimiter_holder='\\\"'
51+
fi
52+
sed -i -e "s/delimiter-place-holder/$csv_delimiter_holder/g" $DUMPLING_TEST_DIR/conf/lightning.toml
53+
escape_backslash_holder="true"
54+
if [ "$escape_backslash" = "false" ] && [ "$csv_delimiter" != "" ]; then
55+
escape_backslash_holder="false"
56+
fi
57+
sed -i -e "s/backslash-escape-place-holder/$escape_backslash_holder/g" $DUMPLING_TEST_DIR/conf/lightning.toml
58+
59+
cat "$DUMPLING_TEST_DIR/conf/lightning.toml"
60+
# use lightning import data to tidb
61+
run_lightning $DUMPLING_TEST_DIR/conf/lightning.toml
62+
63+
# check mysql and tidb data
64+
check_sync_diff $cur/conf/diff_config.toml
65+
}
66+
67+
escape_backslash_arr="true false"
68+
csv_delimiter_arr="\" '"
69+
csv_separator_arr=', a aa |*|'
70+
71+
for escape_backslash in $escape_backslash_arr
72+
do
73+
for csv_separator in $csv_separator_arr
74+
do
75+
for csv_delimiter in $csv_delimiter_arr
76+
do
77+
run
78+
done
79+
if [ "$escape_backslash" = "true" ]; then
80+
csv_delimiter=""
81+
run
82+
fi
83+
done
84+
done

0 commit comments

Comments
 (0)