Skip to content

Commit

Permalink
Add storage class to save database history to bq (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
ismailsimsek committed Feb 9, 2023
1 parent f0ffaca commit d9bbd17
Show file tree
Hide file tree
Showing 6 changed files with 502 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@

import java.io.FileInputStream;
import java.io.IOException;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import javax.enterprise.inject.Instance;
import javax.enterprise.inject.literal.NamedLiteral;

import com.google.api.gax.retrying.RetrySettings;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.*;
import org.eclipse.microprofile.config.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -102,5 +103,20 @@ public static BigQuery getBQClient(Optional<String> gcpProject, Optional<String>
.getService();

}

public static TableResult executeQuery(BigQuery bqClient, String query, List<QueryParameterValue> parameters) throws SQLException {
try {
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
.setPositionalParameters(parameters)
.build();
return bqClient.query(queryConfig);
} catch (BigQueryException | InterruptedException e) {
throw new SQLException(e);
}
}

public static TableResult executeQuery(BigQuery bqClient, String query) throws SQLException {
return BatchUtil.executeQuery(bqClient, query, null);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
/*
*
* * Copyright memiiso Authors.
* *
* * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
*
*/

package io.debezium.server.bigquery.history;

import io.debezium.DebeziumException;
import io.debezium.annotation.ThreadSafe;
import io.debezium.common.annotation.Incubating;
import io.debezium.config.Configuration;
import io.debezium.config.Field;
import io.debezium.document.DocumentReader;
import io.debezium.document.DocumentWriter;
import io.debezium.relational.history.*;
import io.debezium.server.bigquery.BatchUtil;
import io.debezium.util.Collect;
import io.debezium.util.FunctionalReadWriteLock;
import io.debezium.util.Strings;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.Collection;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;

import autovalue.shaded.com.google.common.collect.ImmutableList;
import com.google.cloud.bigquery.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* A {@link DatabaseHistory} implementation that stores the schema history to database table
*
* @author Ismail Simsek
*/
@ThreadSafe
@Incubating
public final class BigquerySchemaHistory extends AbstractDatabaseHistory {

private static final Logger LOG = LoggerFactory.getLogger(BigquerySchemaHistory.class);

public static final String DATABASE_HISTORY_TABLE_DDL = "CREATE TABLE IF NOT EXISTS %s " +
"(id STRING NOT NULL, " +
"history_data STRING, " +
"record_insert_ts TIMESTAMP NOT NULL " +
")";

public static final String DATABASE_HISTORY_STORAGE_TABLE_INSERT = "INSERT INTO %s VALUES ( ?, ?, ? )";
public static final String DATABASE_HISTORY_STORAGE_TABLE_SELECT = "SELECT id, history_data, record_insert_ts FROM %s ORDER BY " +
"record_insert_ts ASC";

static final Field SINK_TYPE_FIELD = Field.create("debezium.sink.type").required();
public static Collection<Field> ALL_FIELDS = Collect.arrayListOf(SINK_TYPE_FIELD);

private final FunctionalReadWriteLock lock = FunctionalReadWriteLock.reentrant();
private final DocumentWriter writer = DocumentWriter.defaultWriter();
private final DocumentReader reader = DocumentReader.defaultReader();
private final AtomicBoolean running = new AtomicBoolean();
BigquerySchemaHistoryConfig config;
BigQuery bqClient;
private String tableFullName;
private TableId tableId;

@Override
public void configure(Configuration config, HistoryRecordComparator comparator, DatabaseHistoryListener listener, boolean useCatalogBeforeSchema) {

super.configure(config, comparator, listener, useCatalogBeforeSchema);
this.config = new BigquerySchemaHistoryConfig(config);
try {
bqClient = BatchUtil.getBQClient(
Optional.ofNullable(this.config.getBigqueryProject()),
Optional.ofNullable(this.config.getBigqueryDataset()),
Optional.ofNullable(this.config.getBigqueryCredentialsFile()),
this.config.getBigqueryLocation()
);
tableFullName = String.format("%s.%s", this.config.getBigqueryDataset(), this.config.getBigqueryTable());
tableId = TableId.of(this.config.getBigqueryDataset(), this.config.getBigqueryTable());
} catch (Exception e) {
throw new DatabaseHistoryException("Failed to connect bigquery database history backing store", e);
}

if (running.get()) {
throw new DatabaseHistoryException("Bigquery database history process already initialized table: " + tableFullName);
}
}

@Override
public void start() {
super.start();
lock.write(() -> {
if (running.compareAndSet(false, true)) {
try {
if (!storageExists()) {
initializeStorage();
}
} catch (Exception e) {
throw new DatabaseHistoryException("Unable to create history table: " + tableFullName + " : " + e.getMessage(),
e);
}
}
});
}

public String getTableFullName() {
return tableFullName;
}

@Override
protected void storeRecord(HistoryRecord record) throws DatabaseHistoryException {
if (record == null) {
return;
}
lock.write(() -> {
if (!running.get()) {
throw new DebeziumException("The history has been stopped and will not accept more records");
}
try {
String recordDocString = writer.write(record.document());
LOG.trace("Saving history data {}", recordDocString);
Timestamp currentTs = new Timestamp(System.currentTimeMillis());
BatchUtil.executeQuery(bqClient,
String.format(DATABASE_HISTORY_STORAGE_TABLE_INSERT, tableFullName),
ImmutableList.of(
QueryParameterValue.string(UUID.randomUUID().toString()),
QueryParameterValue.string(recordDocString),
QueryParameterValue.timestamp(String.valueOf(currentTs))
)
);
LOG.trace("Successfully saved history data to bigquery table");
} catch (IOException | SQLException e) {
throw new DatabaseHistoryException("Failed to store record: " + record, e);
}
});
}

@Override
public void stop() {
running.set(false);
super.stop();
}

@Override
protected synchronized void recoverRecords(Consumer<HistoryRecord> records) {
lock.write(() -> {
try {
if (exists()) {
TableResult rs = BatchUtil.executeQuery(bqClient, String.format(DATABASE_HISTORY_STORAGE_TABLE_SELECT, tableFullName));
for (FieldValueList row : rs.getValues()) {
String line = row.get("history_data").getStringValue();
if (line == null) {
break;
}
if (!line.isEmpty()) {
records.accept(new HistoryRecord(reader.read(line)));
}
}
}
} catch (IOException | SQLException e) {
throw new DatabaseHistoryException("Failed to recover records", e);
}
});
}

@Override
public boolean storageExists() {
Table table = bqClient.getTable(tableId);
return table != null;
}

@Override
public boolean exists() {

if (!storageExists()) {
return false;
}

int numRows = 0;
try {
TableResult rs = BatchUtil.executeQuery(bqClient, "SELECT COUNT(*) as row_count FROM " + tableFullName);
for (FieldValueList row : rs.getValues()) {
numRows = row.get("row_count").getNumericValue().intValue();
break;
}
} catch (SQLException e) {
throw new DatabaseHistoryException("Failed to check database history storage", e);
}
return numRows > 0;
}

@Override
public String toString() {
return "Bigquery database history storage: " + (tableFullName != null ? tableFullName : "(unstarted)");
}

@Override
public void initializeStorage() {
if (!storageExists()) {
try {
LOG.debug("Creating table {} to store database history", tableFullName);
BatchUtil.executeQuery(bqClient, String.format(DATABASE_HISTORY_TABLE_DDL, tableFullName));
LOG.warn("Created database history storage table {} to store history", tableFullName);

if (!Strings.isNullOrEmpty(config.getMigrateHistoryFile().strip())) {
LOG.warn("Migrating history from file {}", config.getMigrateHistoryFile());
this.loadFileDatabaseHistory(new File(config.getMigrateHistoryFile()));
}
} catch (Exception e) {
throw new DatabaseHistoryException("Creation of database history topic failed, please create the topic manually", e);
}
} else {
LOG.debug("Storage is exists, skipping initialization");
}
}

private void loadFileDatabaseHistory(File file) {
LOG.warn(String.format("Migrating file database history from:'%s' to Bigquery database history storage: %s",
file.toPath(), tableFullName));
AtomicInteger numRecords = new AtomicInteger();
lock.write(() -> {
try (BufferedReader historyReader = Files.newBufferedReader(file.toPath())) {
while (true) {
String line = historyReader.readLine();
if (line == null) {
break;
}
if (!line.isEmpty()) {
this.storeRecord(new HistoryRecord(reader.read(line)));
numRecords.getAndIncrement();
}
}
} catch (IOException e) {
logger.error("Failed to migrate history record from history file at {}", file.toPath(), e);
}
});
LOG.warn("Migrated {} database history record. " +
"Migrating file database history to Bigquery database history storage successfully completed", numRecords.get());
}

public static class BigquerySchemaHistoryConfig {
private final Configuration config;

public BigquerySchemaHistoryConfig(Configuration config) {

if (!config.validateAndRecord(ALL_FIELDS, LOG::error)) {
throw new DatabaseHistoryException(
"Error configuring an instance of " + getClass().getSimpleName() + "; check the logs for details");
}
config.validateAndRecord(ALL_FIELDS, LOG::error);

this.config = config;
}

public String sinkType() {
String type = this.config.getString(SINK_TYPE_FIELD);
if (type == null) {
throw new DatabaseHistoryException("The config property debezium.sink.type is required " +
"but it could not be found in any config source");
}
return type;
}

public String getBigqueryProject() {
return this.config.getString(Field.create(String.format("debezium.sink.%s.project", this.sinkType())));
}

public String getBigqueryDataset() {
return this.config.getString(Field.create(String.format("debezium.sink.%s.dataset", this.sinkType())));
}

public String getBigqueryTable() {
return this.config.getString(Field.create("database.history.bigquery.table-name").withDefault(
"debezium_database_history_storage"));
}

public String getMigrateHistoryFile() {
return this.config.getString(Field.create("database.history.bigquery.migrate-history-file").withDefault(""));
}

public String getBigqueryCredentialsFile() {
return this.config.getString(Field.create(String.format("debezium.sink.%s.credentials-file", this.sinkType())).withDefault(""));
}

public String getBigqueryLocation() {
return this.config.getString(Field.create(String.format("debezium.sink.%s.location", this.sinkType())).withDefault("US"));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,6 @@ public BigqueryOffsetBackingStore() {
public String getTableFullName() {
return tableFullName;
}

private TableResult executeQuery(String query, List<QueryParameterValue> parameters) throws SQLException {
try {
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
.setPositionalParameters(parameters)
.build();
return bqClient.query(queryConfig);
} catch (BigQueryException | InterruptedException e) {
throw new SQLException(e);
}
}

private TableResult executeQuery(String query) throws SQLException {
return this.executeQuery(query, null);
}

@Override
public void configure(WorkerConfig config) {
Expand Down Expand Up @@ -124,7 +109,7 @@ private void initializeTable() throws SQLException {
Table table = bqClient.getTable(tableId);
if (table == null) {
LOG.debug("Creating table {} to store offset", tableFullName);
executeQuery(String.format(OFFSET_STORAGE_TABLE_DDL, tableFullName));
BatchUtil.executeQuery(bqClient, String.format(OFFSET_STORAGE_TABLE_DDL, tableFullName));
LOG.warn("Created offset storage table {} to store offset", tableFullName);

if (!Strings.isNullOrEmpty(config.getMigrateOffsetFile().strip())){
Expand All @@ -137,11 +122,11 @@ private void initializeTable() throws SQLException {
protected void save() {
LOG.debug("Saving offset data to bigquery table...");
try {
this.executeQuery(String.format(OFFSET_STORAGE_TABLE_DELETE, tableFullName));
BatchUtil.executeQuery(bqClient, String.format(OFFSET_STORAGE_TABLE_DELETE, tableFullName));
String dataJson = mapper.writeValueAsString(data);
LOG.debug("Saving offset data {}", dataJson);
Timestamp currentTs = new Timestamp(System.currentTimeMillis());
this.executeQuery(
BatchUtil.executeQuery(bqClient,
String.format(OFFSET_STORAGE_TABLE_INSERT, tableFullName),
ImmutableList.of(
QueryParameterValue.string(UUID.randomUUID().toString()),
Expand All @@ -159,7 +144,7 @@ protected void save() {
private void load() {
try {
String dataJsonString = null;
TableResult rs = this.executeQuery(String.format(OFFSET_STORAGE_TABLE_SELECT, tableFullName));
TableResult rs = BatchUtil.executeQuery(bqClient, String.format(OFFSET_STORAGE_TABLE_SELECT, tableFullName));
for (FieldValueList row : rs.getValues()) {
dataJsonString = row.get("offset_data").getStringValue();
break;
Expand Down
Loading

0 comments on commit d9bbd17

Please sign in to comment.