Skip to content

Commit df68a6d

Browse files
authored
Perf-Dataloader: per table data loader for yugabytedb (#130)
* perf dataloader
1 parent 39d89eb commit df68a6d

27 files changed

+1493
-9
lines changed
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<component name="ProjectRunConfigurationManager">
2+
<configuration default="false" name="perf-dataloader - yugabyte" type="Application" factoryName="Application">
3+
<option name="MAIN_CLASS_NAME" value="com.oltpbenchmark.DBWorkload" />
4+
<module name="benchbase" />
5+
<option name="PROGRAM_PARAMETERS" value="-b perf-dataloader -c config/yugabyte/sample_dataloader_config.yaml --load=true -p tableName=actor -p rows=100" />
6+
<extension name="coverage">
7+
<pattern>
8+
<option name="PATTERN" value="com.oltpbenchmark.*" />
9+
<option name="ENABLED" value="true" />
10+
</pattern>
11+
</extension>
12+
<extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
13+
<option name="credential" />
14+
<option name="region" />
15+
<option name="useCurrentConnection" value="false" />
16+
</extension>
17+
<method v="2">
18+
<option name="Make" enabled="true" />
19+
</method>
20+
</configuration>
21+
</component>

PERF-DATALOADER.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Perf Dataloader
2+
3+
### used to infer the table schema from the database and generate a benchbase interpreted input yaml file which can be used to load sample data into the table.
4+
5+
6+
build command:
7+
```
8+
cd benchbase
9+
./build.sh
10+
```
11+
12+
how to use:
13+
```
14+
#$./perfloader --help
15+
Usage: ./perfloader --config <config_file> --table-name <table_name> --rows <rows> [--generate-only] [--load-only]
16+
Short forms: -c <config_file> -t <table_name> -r <rows>
17+
Options:
18+
-c, --config Configuration file
19+
-t, --table-name Table name
20+
-r, --rows Number of rows
21+
--generate-only Only generate the loader file
22+
--load-only Only load data into the database
23+
-h, --help Display this help message
24+
25+
```
26+
- to only generate the loader file(skip the actual load). This will generate the yaml file <table-name>_loader.yaml which can be used in loading the data.
27+
```
28+
./perfloader --config <config_file> --table-name <table_name> --rows <rows> --generate-only
29+
```
30+
31+
- to only load the data(when your loader file is already generate)
32+
```
33+
./perfloader --config <config_file> --load-only
34+
```
35+
36+
- to generate the loader yaml file and load the data in one go
37+
```
38+
./perfloader --config <config_file> --table-name <table_name> --rows <rows>
39+
```
40+
41+
the input yaml file should have following content
42+
```
43+
type: YUGABYTE
44+
driver: com.yugabyte.Driver
45+
url: jdbc:yugabytedb://localhost:5433/yugabyte?sslmode=require&reWriteBatchedInserts=true
46+
username: yugabyte
47+
password: password
48+
49+
tablename: {{tableName}}
50+
rows: {{rows}}
51+
52+
```
53+
54+
#### Caveat/In-progress items
55+
- partitioned tables are not yet supported.
56+
- columns with user defined data types are not yet supported.

build.sh

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/sh
2+
3+
./mvnw clean package -P yugabyte -DskipTests
4+
cd target
5+
tar -xzf benchbase-yugabyte.tgz
6+
cd benchbase-yugabyte

config/plugin.xml

+1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@
1818
<plugin name="smallbank">com.oltpbenchmark.benchmarks.smallbank.SmallBankBenchmark</plugin>
1919
<plugin name="hyadapt">com.oltpbenchmark.benchmarks.hyadapt.HYADAPTBenchmark</plugin>
2020
<plugin name="otmetrics">com.oltpbenchmark.benchmarks.otmetrics.OTMetricsBenchmark</plugin>
21+
<plugin name="perf-dataloader">com.oltpbenchmark.benchmarks.dataloader.DataGenerator</plugin>
2122
</plugins>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
type: YUGABYTE
2+
driver: com.yugabyte.Driver
3+
url: jdbc:yugabytedb://172.151.25.127:5433/yugabyte?sslmode=require&reWriteBatchedInserts=true
4+
username: yugabyte
5+
password: Password@321
6+
7+
tablename: {{tableName}}
8+
rows: {{rows}}

perfloader

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/bin/bash
2+
3+
# Initialize variables
4+
CONFIG=""
5+
TABLE_NAME=""
6+
ROWS=""
7+
GENERATE_ONLY=false
8+
LOAD_ONLY=false
9+
10+
# Function to display help
11+
function display_help {
12+
echo "Usage: $0 --config <config_file> --table-name <table_name> --rows <rows> [--generate-only] [--load-only]"
13+
echo "Short forms: -c <config_file> -t <table_name> -r <rows>"
14+
echo "Options:"
15+
echo " -c, --config Configuration file"
16+
echo " -t, --table-name Table name"
17+
echo " -r, --rows Number of rows"
18+
echo " --generate-only Only generate the loader file"
19+
echo " --load-only Only load data into the database"
20+
echo " -h, --help Display this help message"
21+
exit 0
22+
}
23+
24+
# Parse arguments
25+
while [[ $# -gt 0 ]]; do
26+
case $1 in
27+
-c|--config)
28+
CONFIG="$2"
29+
shift # past argument
30+
shift # past value
31+
;;
32+
-t|--table-name)
33+
TABLE_NAME="$2"
34+
shift # past argument
35+
shift # past value
36+
;;
37+
-r|--rows)
38+
ROWS="$2"
39+
shift # past argument
40+
shift # past value
41+
;;
42+
--generate-only)
43+
GENERATE_ONLY=true
44+
shift # past argument
45+
;;
46+
--load-only)
47+
LOAD_ONLY=true
48+
shift # past argument
49+
;;
50+
-h|--help)
51+
display_help
52+
;;
53+
*)
54+
echo "Invalid option: $1"
55+
display_help
56+
;;
57+
esac
58+
done
59+
60+
# Ensure that both --generate-only and --load-only are not provided simultaneously
61+
if [ "$GENERATE_ONLY" = true ] && [ "$LOAD_ONLY" = true ]; then
62+
echo "Error: Cannot use --generate-only and --load-only simultaneously."
63+
exit 1
64+
fi
65+
66+
if [ "$LOAD_ONLY" = true ]; then
67+
if [ -z "$CONFIG" ]; then
68+
echo "Error: --config parameter is required with --load-only."
69+
display_help
70+
fi
71+
echo "Loading the data into the database now!"
72+
java -jar target/benchbase-yugabyte/benchbase.jar -b featurebench -c "$CONFIG" --load=True
73+
exit 0
74+
fi
75+
76+
if [ "$GENERATE_ONLY" = true ]; then
77+
if [ -z "$CONFIG" ] || [ -z "$TABLE_NAME" ] || [ -z "$ROWS" ]; then
78+
echo "Error: --config, --table-name, and --rows parameters are required with --generate-only."
79+
display_help
80+
fi
81+
echo "Generating loader file for the table"
82+
java -jar target/benchbase-yugabyte/benchbase.jar -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
83+
exit 0
84+
fi
85+
86+
# Check if required parameters are provided for the default operation
87+
if [ -z "$CONFIG" ] || [ -z "$TABLE_NAME" ] || [ -z "$ROWS" ]; then
88+
echo "Error: Missing required parameters."
89+
display_help
90+
fi
91+
92+
# If no specific option is provided, do both generate and load with default config file location
93+
echo "Generating loader file for the table"
94+
java -jar target/benchbase-yugabyte/benchbase.jar -b perf-dataloader -c "$CONFIG" -p tableName="$TABLE_NAME" -p rows="$ROWS" --load=True
95+
96+
DEFAULT_CONFIG="${TABLE_NAME}_loader.yaml"
97+
98+
echo "Loading the data into the database now!"
99+
java -jar target/benchbase-yugabyte/benchbase.jar -b featurebench -c "$DEFAULT_CONFIG" --load=True

src/main/java/com/oltpbenchmark/DBWorkload.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public static void main(String[] args) throws Exception {
120120
int lastTxnId = 0;
121121
for (String plugin : targetList) {
122122
String pluginTest = "[@bench='" + plugin + "']";
123-
if (plugin.equalsIgnoreCase("featurebench"))
123+
if (plugin.equalsIgnoreCase("featurebench") || plugin.equalsIgnoreCase("perf-dataloader"))
124124
{
125125
String[] params=null;
126126
if (argsLine.hasOption("params")) {
@@ -140,6 +140,7 @@ public static void main(String[] args) throws Exception {
140140
WorkloadConfiguration wrkld = new WorkloadConfiguration();
141141
wrkld.setBenchmarkName(plugin);
142142
wrkld.setXmlConfig(xmlConfig);
143+
wrkld.setConfigFilePath(configFile);
143144

144145
// Pull in database configuration
145146
wrkld.setDatabaseType(DatabaseType.get(xmlConfig.getString("type")));
@@ -410,7 +411,7 @@ public static void main(String[] args) throws Exception {
410411

411412
if (targetList.length > 1 || work.containsKey("weights[@bench]")) {
412413
weight_strings = Arrays.asList(work.getString("weights" + pluginTest).split("\\s*,\\s*"));
413-
} else if (plugin.equalsIgnoreCase("featurebench")) {
414+
} else if (plugin.equalsIgnoreCase("featurebench") || plugin.equalsIgnoreCase("perf-dataloader")) {
414415
weight_strings = List.of();
415416
time = work.getInt("/time_secs", 0);
416417
// get workload specific time in secs

src/main/java/com/oltpbenchmark/WorkloadConfiguration.java

+9
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ public class WorkloadConfiguration {
4949
private int isolationMode = Connection.TRANSACTION_SERIALIZABLE;
5050
private String dataDir = null;
5151
private String ddlPath = null;
52+
private String configFilePath = null;
5253

5354
/**
5455
* If true, establish a new connection for each transaction, otherwise use one persistent connection per client
@@ -124,6 +125,14 @@ public void setMaxRetries(int maxRetries) {
124125
this.maxRetries = maxRetries;
125126
}
126127

128+
public String getConfigFilePath() {
129+
return configFilePath;
130+
}
131+
132+
public void setConfigFilePath(String configFilePath) {
133+
this.configFilePath = configFilePath;
134+
}
135+
127136
/**
128137
* @return @see newConnectionPerTxn member docs for behavior.
129138
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package com.oltpbenchmark.benchmarks.dataloader;
2+
3+
public class Column {
4+
private String columnName;
5+
private String dataType;
6+
private Integer characterMaximumLength;
7+
private Boolean isIdentity;
8+
9+
// Getters and Setters
10+
public String getColumnName() {
11+
return columnName;
12+
}
13+
14+
public void setColumnName(String columnName) {
15+
this.columnName = columnName;
16+
}
17+
18+
public String getDataType() {
19+
return dataType;
20+
}
21+
22+
public void setDataType(String dataType) {
23+
this.dataType = dataType;
24+
}
25+
26+
public Integer getCharacterMaximumLength() {
27+
return characterMaximumLength;
28+
}
29+
30+
public void setCharacterMaximumLength(Integer characterMaximumLength) {
31+
this.characterMaximumLength = characterMaximumLength;
32+
}
33+
34+
public Boolean getIsIdentity() {
35+
return isIdentity;
36+
}
37+
38+
public void setIsIdentity(Boolean isIdentity) {
39+
this.isIdentity = isIdentity;
40+
}
41+
42+
@Override
43+
public String toString() {
44+
return "Column{" +
45+
"columnName='" + columnName + '\'' +
46+
", dataType='" + dataType + '\'' +
47+
", characterMaximumLength=" + characterMaximumLength +
48+
", isIdentity=" + isIdentity +
49+
'}';
50+
}
51+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package com.oltpbenchmark.benchmarks.dataloader;
2+
3+
import com.oltpbenchmark.WorkloadConfiguration;
4+
import com.oltpbenchmark.api.BenchmarkModule;
5+
import com.oltpbenchmark.api.Loader;
6+
import com.oltpbenchmark.api.Worker;
7+
import com.oltpbenchmark.benchmarks.featurebench.FeatureBenchLoader;
8+
import org.apache.commons.configuration2.HierarchicalConfiguration;
9+
import org.apache.commons.configuration2.tree.ImmutableNode;
10+
11+
import java.io.FileInputStream;
12+
import java.io.FileNotFoundException;
13+
import java.io.IOException;
14+
import java.io.InputStream;
15+
import java.util.*;
16+
17+
public class DataGenerator extends BenchmarkModule {
18+
/**
19+
* Constructor!
20+
*
21+
* @param workConf
22+
*/
23+
public DataGenerator(WorkloadConfiguration workConf) {
24+
super(workConf);
25+
}
26+
27+
@Override
28+
protected List<Worker<? extends BenchmarkModule>> makeWorkersImpl() throws IOException {
29+
return null;
30+
}
31+
32+
@Override
33+
protected Loader<DataGenerator> makeLoaderImpl() {
34+
// load properties file
35+
36+
return new DataGeneratorLoader(this, getProperties("datatype-mapping.properties"),
37+
getProperties("pk-mapping.properties"), getFkProperties());
38+
}
39+
40+
@Override
41+
protected Package getProcedurePackageImpl() {
42+
return null;
43+
}
44+
45+
public Map<String, PropertyMapping> getProperties(String propertiesType) {
46+
Properties properties = new Properties();
47+
Map<String, PropertyMapping> propertyMap = new LinkedHashMap<>();
48+
final String path = "/benchmarks/" + getBenchmarkName() + "/" + propertiesType;
49+
50+
try (InputStream input = this.getClass().getResourceAsStream(path)) {
51+
properties.load(input);
52+
for (String key : properties.stringPropertyNames()) {
53+
String value = properties.getProperty(key);
54+
String[] parts = value.split(":");
55+
String className = parts[0];
56+
int numParams = Integer.parseInt(parts[1]);
57+
List<Object> params = new ArrayList<>();
58+
if (numParams > 0) {
59+
params.addAll(Arrays.asList(parts[2].split(",")));
60+
}
61+
62+
PropertyMapping property = new PropertyMapping(className, numParams, params);
63+
propertyMap.put(key, property);
64+
}
65+
} catch (IOException e) {
66+
throw new RuntimeException(e);
67+
}
68+
return propertyMap;
69+
}
70+
71+
public Map<String, FkPropertyMapping> getFkProperties() {
72+
Properties properties = new Properties();
73+
Map<String, FkPropertyMapping> propertyMap = new LinkedHashMap<>();
74+
final String path = "/benchmarks/" + getBenchmarkName() + "/fk-mapping.properties";
75+
76+
try (InputStream input = this.getClass().getResourceAsStream(path)) {
77+
properties.load(input);
78+
for (String key : properties.stringPropertyNames()) {
79+
String value = properties.getProperty(key);
80+
String[] parts = value.split(":");
81+
String className = parts[0];
82+
String dataType = parts[1];
83+
84+
FkPropertyMapping property = new FkPropertyMapping(className, dataType);
85+
propertyMap.put(key, property);
86+
}
87+
} catch (IOException e) {
88+
throw new RuntimeException(e);
89+
}
90+
return propertyMap;
91+
}
92+
}

0 commit comments

Comments
 (0)