Skip to content

Commit cb716bc

Browse files
authored
Perfloader updates (#136)
* perfdataloader updates
1 parent 0395bd3 commit cb716bc

File tree

9 files changed

+173
-41
lines changed

9 files changed

+173
-41
lines changed

Diff for: PERF-DATALOADER.md

+21-16
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,46 @@
11
# Perf Dataloader
22

3-
### used to infer the table schema from the database and generate a benchbase interpreted input yaml file which can be used to load sample data into the table.
3+
### Used to infer the table schema from the database and generate a benchbase interpreted input yaml file which can be used to load sample data into the table.
4+
This tool is integrated inside benchbase so that the users don't have to install additional tools for using it. All existing functionalities from yugabyte/benchbase should work as it is.
45

6+
### Build steps:
7+
#### Pre-requisites
8+
- java version 17 or higher is installed
9+
- maven version 3.6 or higher is installed
510

6-
build command:
11+
#### build steps
12+
The build steps will also validate if the pre-requisites are met.
713
```
814
cd benchbase
915
./build.sh
1016
```
1117

12-
how to use:
18+
### How to use:
1319
```
14-
#$./perfloader --help
15-
Usage: ./perfloader --config <config_file> --table-name <table_name> --rows <rows> [--generate-only] [--load-only]
20+
#$./perf-data-loader --help
21+
Usage: ./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only]
1622
Short forms: -c <config_file> -t <table_name> -r <rows>
1723
Options:
18-
-c, --config Configuration file
19-
-t, --table-name Table name
20-
-r, --rows Number of rows
21-
--generate-only Only generate the loader file
22-
--load-only Only load data into the database
23-
-h, --help Display this help message
24-
24+
-c, --config Configuration file
25+
-t, --table-name Table name
26+
-r, --rows Number of rows
27+
--gen-config-only Only generate the loader/config file
28+
--load-only Only load data into the database
29+
-h, --help Display this help message
2530
```
2631
- to only generate the loader file(skip the actual load). This will generate the yaml file <table-name>_loader.yaml which can be used in loading the data.
2732
```
28-
./perfloader --config <config_file> --table-name <table_name> --rows <rows> --generate-only
33+
./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows> --gen-config-only
2934
```
3035

3136
- to only load the data(when your loader file is already generate)
3237
```
33-
./perfloader --config <config_file> --load-only
38+
./perf-data-loader --config <config_file> --load-only
3439
```
3540

3641
- to generate the loader yaml file and load the data in one go
3742
```
38-
./perfloader --config <config_file> --table-name <table_name> --rows <rows>
43+
./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows>
3944
```
4045

4146
the input yaml file should have following content
@@ -55,4 +60,4 @@ rows: {{rows}}
5560
- partitioned tables are not yet supported.
5661
- columns with user defined data types are not yet supported.
5762

58-
#### [Reference utility functions](https://github.com/yugabyte/benchbase/blob/main/src/main/java/com/oltpbenchmark/benchmarks/featurebench/Readme.md#utility-functions-)
63+
#### [Reference utility functions](https://github.com/yugabyte/benchbase/blob/main/src/main/java/com/oltpbenchmark/benchmarks/featurebench/Readme.md#utility-functions-)

Diff for: build.sh

+59
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,64 @@
11
#!/bin/sh
22

3+
# Function to check Java version
4+
check_java() {
5+
if command -v java > /dev/null; then
6+
JAVA_VERSION=$(java -version 2>&1 | awk -F[\"_] 'NR==1 {print $2}')
7+
JAVA_MAJOR_VERSION=$(echo $JAVA_VERSION | awk -F[.] '{print $1}')
8+
if [ "$JAVA_MAJOR_VERSION" -ge 17 ]; then
9+
echo "Java version $JAVA_VERSION is installed."
10+
else
11+
echo "Java 17 or higher is required. Please install the required version of Java."
12+
exit 1
13+
fi
14+
else
15+
echo "Java is not installed. Please install Java 17 or higher."
16+
exit 1
17+
fi
18+
}
19+
20+
# Function to check Maven version
21+
check_maven() {
22+
if command -v mvn > /dev/null; then
23+
MAVEN_VERSION=$(mvn -version 2>&1 | awk '/Apache Maven/ {print $3}')
24+
MAVEN_MAJOR_VERSION=$(echo $MAVEN_VERSION | awk -F[.] '{print $1}')
25+
MAVEN_MINOR_VERSION=$(echo $MAVEN_VERSION | awk -F[.] '{print $2}')
26+
if [ "$MAVEN_MAJOR_VERSION" -ge 3 ] && [ "$MAVEN_MINOR_VERSION" -ge 6 ]; then
27+
echo "Maven version $MAVEN_VERSION is installed."
28+
else
29+
echo "Maven 3.6 or higher is required. Please install the required version of Maven."
30+
exit 1
31+
fi
32+
else
33+
echo "Maven is not installed. Please install Maven 3.6 or higher."
34+
exit 1
35+
fi
36+
}
37+
38+
# Check for OS and run respective checks
39+
case "$(uname -s)" in
40+
Linux*)
41+
echo "Running on Linux"
42+
check_java
43+
check_maven
44+
;;
45+
Darwin*)
46+
echo "Running on macOS"
47+
check_java
48+
check_maven
49+
;;
50+
CYGWIN*|MINGW*|MSYS*)
51+
echo "Running on Windows"
52+
check_java
53+
check_maven
54+
;;
55+
*)
56+
echo "Unsupported OS"
57+
exit 1
58+
;;
59+
esac
60+
61+
# Continue with the build process
362
./mvnw clean package -P yugabyte -DskipTests
463
cd target
564
tar -xzf benchbase-yugabyte.tgz

Diff for: perfloader renamed to perf-data-loader

+43-15
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,46 @@ TABLE_NAME=""
66
ROWS=""
77
GENERATE_ONLY=false
88
LOAD_ONLY=false
9+
JAR_PATH=""
910

1011
# Function to display help
1112
function display_help {
12-
echo "Usage: $0 --config <config_file> --table-name <table_name> --rows <rows> [--generate-only] [--load-only]"
13+
echo "Usage: $0 --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only]"
1314
echo "Short forms: -c <config_file> -t <table_name> -r <rows>"
1415
echo "Options:"
15-
echo " -c, --config Configuration file"
16-
echo " -t, --table-name Table name"
17-
echo " -r, --rows Number of rows"
18-
echo " --generate-only Only generate the loader file"
19-
echo " --load-only Only load data into the database"
20-
echo " -h, --help Display this help message"
16+
echo " -c, --config Configuration file"
17+
echo " -t, --table-name Table name"
18+
echo " -r, --rows Number of rows"
19+
echo " --gen-config-only Only generate the loader/config file"
20+
echo " --load-only Only load data into the database"
21+
echo " -h, --help Display this help message"
2122
exit 0
2223
}
2324

25+
# Function to find the jar file
26+
function find_jar {
27+
# Check current directory first
28+
if [[ -f "benchbase.jar" ]]; then
29+
JAR_PATH="benchbase.jar"
30+
return
31+
fi
32+
33+
# Check target/benchbase-yugabyte/
34+
if [[ -f "target/benchbase-yugabyte/benchbase.jar" ]]; then
35+
JAR_PATH="target/benchbase-yugabyte/benchbase.jar"
36+
return
37+
fi
38+
39+
# Check target/benchbase-postgres/
40+
if [[ -f "target/benchbase-postgres/benchbase.jar" ]]; then
41+
JAR_PATH="target/benchbase-postgres/benchbase.jar"
42+
return
43+
fi
44+
45+
echo "Error: benchbase.jar not found in any expected location."
46+
exit 1
47+
}
48+
2449
# Parse arguments
2550
while [[ $# -gt 0 ]]; do
2651
case $1 in
@@ -39,7 +64,7 @@ while [[ $# -gt 0 ]]; do
3964
shift # past argument
4065
shift # past value
4166
;;
42-
--generate-only)
67+
--gen-config-only)
4368
GENERATE_ONLY=true
4469
shift # past argument
4570
;;
@@ -57,29 +82,32 @@ while [[ $# -gt 0 ]]; do
5782
esac
5883
done
5984

60-
# Ensure that both --generate-only and --load-only are not provided simultaneously
85+
# Ensure that both --gen-config-only and --load-only are not provided simultaneously
6186
if [ "$GENERATE_ONLY" = true ] && [ "$LOAD_ONLY" = true ]; then
62-
echo "Error: Cannot use --generate-only and --load-only simultaneously."
87+
echo "Error: Cannot use --gen-config-only and --load-only simultaneously."
6388
exit 1
6489
fi
6590

91+
# Find the JAR file
92+
find_jar
93+
6694
if [ "$LOAD_ONLY" = true ]; then
6795
if [ -z "$CONFIG" ]; then
6896
echo "Error: --config parameter is required with --load-only."
6997
display_help
7098
fi
7199
echo "Loading the data into the database now!"
72-
java -jar target/benchbase-yugabyte/benchbase.jar -b featurebench -c "$CONFIG" --load=True
100+
java -jar "$JAR_PATH" -b featurebench -c "$CONFIG" --load=True
73101
exit 0
74102
fi
75103

76104
if [ "$GENERATE_ONLY" = true ]; then
77105
if [ -z "$CONFIG" ] || [ -z "$TABLE_NAME" ] || [ -z "$ROWS" ]; then
78-
echo "Error: --config, --table-name, and --rows parameters are required with --generate-only."
106+
echo "Error: --config, --table-name, and --rows parameters are required with --gen-config-only."
79107
display_help
80108
fi
81109
echo "Generating loader file for the table"
82-
java -jar target/benchbase-yugabyte/benchbase.jar -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
110+
java -jar "$JAR_PATH" -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
83111
exit 0
84112
fi
85113

@@ -91,9 +119,9 @@ fi
91119

92120
# If no specific option is provided, do both generate and load with default config file location
93121
echo "Generating loader file for the table"
94-
java -jar target/benchbase-yugabyte/benchbase.jar -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
122+
java -jar "$JAR_PATH" -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
95123

96124
DEFAULT_CONFIG="${TABLE_NAME}_loader.yaml"
97125

98126
echo "Loading the data into the database now!"
99-
java -jar target/benchbase-yugabyte/benchbase.jar -b featurebench -c "$DEFAULT_CONFIG" --load=True
127+
java -jar "$JAR_PATH" -b featurebench -c "$DEFAULT_CONFIG" --load=True

Diff for: src/main/assembly/tgz.xml

+7-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
<fileSet>
1212
<directory>${project.basedir}/config</directory>
1313
</fileSet>
14-
<fileSet>
15-
<directory>${project.basedir}/data</directory>
16-
</fileSet>
14+
<!-- <fileSet>-->
15+
<!-- <directory>${project.basedir}/data</directory>-->
16+
<!-- </fileSet>-->
1717
<fileSet>
1818
<directory>${project.basedir}/scripts</directory>
1919
</fileSet>
@@ -36,6 +36,10 @@
3636
<source>${project.basedir}/CONTRIBUTORS.md</source>
3737
<outputDirectory></outputDirectory>
3838
</file>
39+
<file>
40+
<source>${project.basedir}/perf-data-loader</source>
41+
<outputDirectory></outputDirectory>
42+
</file>
3943
</files>
4044
<dependencySets>
4145
<dependencySet>

Diff for: src/main/assembly/zip.xml

+7-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
<fileSet>
1212
<directory>${project.basedir}/config</directory>
1313
</fileSet>
14-
<fileSet>
15-
<directory>${project.basedir}/data</directory>
16-
</fileSet>
14+
<!-- <fileSet>-->
15+
<!-- <directory>${project.basedir}/data</directory>-->
16+
<!-- </fileSet>-->
1717
<fileSet>
1818
<directory>${project.basedir}/scripts</directory>
1919
</fileSet>
@@ -36,6 +36,10 @@
3636
<source>${project.basedir}/CONTRIBUTORS.md</source>
3737
<outputDirectory></outputDirectory>
3838
</file>
39+
<file>
40+
<source>${project.basedir}/perf-data-loader</source>
41+
<outputDirectory></outputDirectory>
42+
</file>
3943
</files>
4044
<dependencySets>
4145
<dependencySet>

Diff for: src/main/java/com/oltpbenchmark/benchmarks/dataloader/DataGeneratorLoader.java

+30-1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ public List<LoaderThread> createLoaderThreads() throws SQLException {
123123
String tableName = workConf.getXmlConfig().getString("tablename");
124124
int rows = workConf.getXmlConfig().getInt("rows");
125125

126+
// check if the table exists in the database
127+
checkIfTableExists(tableName, conn);
126128
// get the table schema
127129
List<Column> tableSchema = getTableSchema(tableName, conn);
128130

@@ -182,6 +184,28 @@ public List<LoaderThread> createLoaderThreads() throws SQLException {
182184
LOG.info("Generated loader file: {}_loader.yaml", tableName);
183185
return new ArrayList<>();
184186
}
187+
188+
public static void checkIfTableExists(String tableName, Connection connection) throws SQLException {
189+
boolean exists = false;
190+
ResultSet resultSet = null;
191+
192+
try {
193+
DatabaseMetaData metaData = connection.getMetaData();
194+
resultSet = metaData.getTables(null, null, tableName, new String[]{"TABLE"});
195+
if (resultSet.next()) {
196+
exists = true;
197+
}
198+
if (!exists) {
199+
throw new RuntimeException(String.format("Table with name %s does not exist", tableName));
200+
}
201+
} finally {
202+
if (resultSet != null) {
203+
resultSet.close();
204+
}
205+
}
206+
207+
}
208+
185209
public static List<Column> getTableSchema(String tableName, Connection conn) {
186210
List<Column> tableSchemaList = new ArrayList<>();
187211
String query = "SELECT column_name, data_type, character_maximum_length, is_identity " +
@@ -431,6 +455,11 @@ public Map<String, PropertyMapping> utilsMapping(List<Column> tableSchema, List<
431455
else
432456
pm = properties.get(col.getDataType().toLowerCase());
433457

458+
if (pm == null) {
459+
throw new RuntimeException(String.format("Cannot find suitable utility function for column " +
460+
"`%s` of datatype `%s`. Consider asking #perf team to add a utility function for given " +
461+
"data type", col.getColumnName(), col.getDataType()));
462+
}
434463
for (int i = 0; i < pm.params.size(); i++) {
435464
Object obj = pm.params.get(i);
436465
if (obj instanceof String) {
@@ -512,7 +541,7 @@ public void writeToFile(String tableName, int rows, Root root) {
512541
combinedData.putAll(newData);
513542

514543
// Write combined data to the new output file
515-
FileWriter fileWriter = new FileWriter(String.format("%s_loader.yaml", tableName));
544+
FileWriter fileWriter = new FileWriter(String.format("%s/%s_loader.yaml", System.getProperty("user.dir"), tableName));
516545
yaml.dump(combinedData, fileWriter);
517546
fileWriter.close();
518547
} catch (IOException e) {

Diff for: src/main/resources/benchmarks/perf-dataloader/datatype-mapping.properties

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
# minimum,maximum
44
integer=RandomInt:2:1,rows
5-
5+
numeric=RandomNumber:2:1,rows
6+
bigint=RandomNumber:2:1,rows
67
#
78
boolean=RandomBoolean:0:
89

@@ -24,8 +25,6 @@ text=RandomAString:2:1,1000
2425
#
2526
uuid=RandomUUID:0:
2627

27-
# minimum,maximum
28-
numeric=RandomNumber:2:1,rows
2928

3029
# arraySize, minLength,maxLength
3130
array=RandomTextArrayGen:3:1,20,20

Diff for: src/main/resources/benchmarks/perf-dataloader/fk-mapping.properties

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
integer=OneNumberFromArray:int
2+
bigint=OneNumberFromArray:int
3+
smallint=OneNumberFromArray:int
24
numeric=OneNumberFromArray:int
35

46
date=OneStringFromArray:string

Diff for: src/main/resources/benchmarks/perf-dataloader/pk-mapping.properties

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
integer=PrimaryIntGen:2:1,rows
33
# startNumber,endNumber,desiredLength
44
string=RandomPKString:3:1,rows,1000
5+
text=RandomPKString:3:1,rows,1000
56

67
# startNumber,length
78
string1=HashedPrimaryStringGen:2:1,5
@@ -23,6 +24,7 @@ character=RandomAString:2:1,max
2324
timestampwithtimezone=RandomTimestampWithTimeZone:1:rows
2425
timestampwithouttimezone=RandomTimestampWithoutTimeZone:1:rows
2526
smallint=RandomInt:2:1,rows
27+
bigint=RandomInt:2:1,rows
2628
tsvector=RandomAString:2:1,rows
2729

2830

0 commit comments

Comments
 (0)