Skip to content

Commit 4f9b068

Browse files
committed
Add OAuth support in Databricks Loader
New config section for storage: ``` "storage" : { ... "oauth": { "clientId": "client-id" "clientSecret": ${OAUTH_CLIENT_SECRET} } } ``` So that we can set `clientId` and `clientSecret` authentication properties. Old-style `password` field (relying on personal access tokens) is still supported.
1 parent 618565a commit 4f9b068

File tree

13 files changed

+114
-33
lines changed

13 files changed

+114
-33
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
java-version: 11
5656
- name: Get the latest Databricks JDBC driver
5757
run: |
58-
curl https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.34/DatabricksJDBC42-2.6.34.1058.zip --output DatabricksJDBC42.jar.zip
58+
curl https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.40/DatabricksJDBC42-2.6.40.1070.zip --output DatabricksJDBC42.jar.zip
5959
unzip DatabricksJDBC42.jar.zip
6060
cp ./*/DatabricksJDBC42.jar . # 2.6.34 download changes directory structure - grab the jar from nested directory (which has entropy in its name)
6161
- name: Docker login

config/loader/aws/databricks.config.reference.hocon

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
"parameterName": "snowplow.databricks.password"
2525
}
2626
},
27+
"oauth": {
28+
"clientId": "client-id"
29+
"clientSecret": ${OAUTH_CLIENT_SECRET}
30+
},
2731
# Optional. Override the Databricks default catalog, e.g. with a Unity catalog name.
2832
"catalog": "hive_metastore",
2933
# DB schema

config/loader/azure/databricks.config.reference.hocon

+4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
"parameterName": "snowplow.databricks.password"
4444
}
4545
},
46+
"oauth": {
47+
"clientId": "client-id"
48+
"clientSecret": ${OAUTH_CLIENT_SECRET}
49+
},
4650
# Optional. Override the Databricks default catalog, e.g. with a Unity catalog name.
4751
"catalog": "hive_metastore",
4852
# DB schema

config/loader/gcp/databricks.config.reference.hocon

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
"parameterName": "snowplow.databricks.password"
2222
}
2323
},
24+
"oauth": {
25+
"clientId": "client-id"
26+
"clientSecret": ${OAUTH_CLIENT_SECRET}
27+
},
2428
# Optional. Override the Databricks default catalog, e.g. with a Unity catalog name.
2529
"catalog": "hive_metastore",
2630
# DB schema

modules/databricks-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/databricks/ConfigSpec.scala

+8-4
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ class ConfigSpec extends Specification {
112112
val result = getConfigFromResource("/loader/aws/databricks.config.minimal.hocon", testParseConfig)
113113
val storage = ConfigSpec.exampleStorage.copy(
114114
catalog = None,
115-
password = StorageTarget.PasswordConfig.PlainText("Supersecret1")
115+
password = Some(StorageTarget.PasswordConfig.PlainText("Supersecret1")),
116+
oauth = None
116117
)
117118
val cloud = Config.Cloud.AWS(RegionSpec.DefaultTestRegion, exampleMessageQueue.copy(region = Some(RegionSpec.DefaultTestRegion)))
118119
val retries = exampleRetries.copy(cumulativeBound = Some(20.minutes))
@@ -140,7 +141,8 @@ class ConfigSpec extends Specification {
140141
val result = getConfigFromResource("/loader/gcp/databricks.config.minimal.hocon", testParseConfig)
141142
val storage = ConfigSpec.exampleStorage.copy(
142143
catalog = None,
143-
password = StorageTarget.PasswordConfig.PlainText("Supersecret1")
144+
password = Some(StorageTarget.PasswordConfig.PlainText("Supersecret1")),
145+
oauth = None
144146
)
145147
val retries = exampleRetries.copy(cumulativeBound = Some(20.minutes))
146148
val readyCheck = exampleReadyCheck.copy(strategy = Config.Strategy.Constant, backoff = 15.seconds)
@@ -167,7 +169,8 @@ class ConfigSpec extends Specification {
167169
val result = getConfigFromResource("/loader/azure/databricks.config.minimal.hocon", testParseConfig)
168170
val storage = ConfigSpec.exampleStorage.copy(
169171
catalog = None,
170-
password = StorageTarget.PasswordConfig.PlainText("Supersecret1")
172+
password = Some(StorageTarget.PasswordConfig.PlainText("Supersecret1")),
173+
oauth = None
171174
)
172175
val retries = exampleRetries.copy(cumulativeBound = Some(20.minutes))
173176
val readyCheck = exampleReadyCheck.copy(strategy = Config.Strategy.Constant, backoff = 15.seconds)
@@ -200,7 +203,8 @@ object ConfigSpec {
200203
"atomic",
201204
443,
202205
"/databricks/http/path",
203-
StorageTarget.PasswordConfig.EncryptedKey(StorageTarget.EncryptedConfig("snowplow.databricks.password")),
206+
Some(StorageTarget.PasswordConfig.EncryptedKey(StorageTarget.EncryptedConfig("snowplow.databricks.password"))),
207+
Some(StorageTarget.Databricks.OAuth("client-id", "client-secret")),
204208
None,
205209
"snowplow-rdbloader-oss",
206210
StorageTarget.LoadAuthMethod.NoCreds,

modules/databricks-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/databricks/DatabricksSpec.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,8 @@ object DatabricksSpec {
227227
"snowplow",
228228
443,
229229
"some/path",
230-
StorageTarget.PasswordConfig.PlainText("xxx"),
230+
Some(StorageTarget.PasswordConfig.PlainText("xxx")),
231+
None,
231232
None,
232233
"useragent",
233234
StorageTarget.LoadAuthMethod.NoCreds,

modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/config/Config.scala

+4-3
Original file line numberDiff line numberDiff line change
@@ -348,9 +348,10 @@ object Config {
348348
private def azureVaultCheck(config: Config[StorageTarget]): List[String] =
349349
config.cloud match {
350350
case c: Config.Cloud.Azure if c.azureVaultName.isEmpty =>
351-
(config.storage.password, config.storage.sshTunnel.flatMap(_.bastion.key)) match {
352-
case (_: StorageTarget.PasswordConfig.EncryptedKey, _) | (_, Some(_)) => List("Azure vault name is needed")
353-
case _ => Nil
351+
(config.storage.credentials, config.storage.sshTunnel.flatMap(_.bastion.key)) match {
352+
case (Some(StorageTarget.Credentials(_, _: StorageTarget.PasswordConfig.EncryptedKey)), _) | (_, Some(_)) =>
353+
List("Azure vault name is needed")
354+
case _ => Nil
354355
}
355356
case _ => Nil
356357
}

modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/config/StorageTarget.scala

+32-5
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ import scala.concurrent.duration.{Duration, FiniteDuration}
3232
*/
3333
sealed trait StorageTarget extends Product with Serializable {
3434
def schema: String
35-
def username: String
36-
def password: StorageTarget.PasswordConfig
35+
def credentials: Option[StorageTarget.Credentials]
3736
def sshTunnel: Option[StorageTarget.TunnelConfig]
3837

3938
def doobieCommitStrategy(timeouts: Config.Timeouts): Strategy = Transaction.defaultStrategy(timeouts)
@@ -80,6 +79,9 @@ object StorageTarget {
8079
sshTunnel: Option[TunnelConfig],
8180
loadAuthMethod: LoadAuthMethod
8281
) extends StorageTarget {
82+
83+
override def credentials: Option[Credentials] = Some(Credentials(username, password))
84+
8385
override def driver: String = "com.amazon.redshift.jdbc42.Driver"
8486

8587
override def connectionUrl: String = s"jdbc:redshift://$host:$port/$database"
@@ -107,15 +109,17 @@ object StorageTarget {
107109
schema: String,
108110
port: Int,
109111
httpPath: String,
110-
password: PasswordConfig,
112+
password: Option[PasswordConfig],
113+
oauth: Option[Databricks.OAuth],
111114
sshTunnel: Option[TunnelConfig],
112115
userAgent: String,
113116
loadAuthMethod: LoadAuthMethod,
114117
eventsOptimizePeriod: FiniteDuration,
115118
logLevel: Int
116119
) extends StorageTarget {
117120

118-
override def username: String = "token"
121+
override def credentials: Option[Credentials] =
122+
password.map(configuredPassword => Credentials(username = "token", password = configuredPassword))
119123

120124
override def driver: String = "com.databricks.client.jdbc.Driver"
121125

@@ -130,18 +134,37 @@ object StorageTarget {
130134
props.put("httpPath", httpPath)
131135
props.put("ssl", 1)
132136
props.put("LogLevel", logLevel)
133-
props.put("AuthMech", 3)
134137
props.put("transportMode", "http")
135138
props.put("UserAgentEntry", userAgent)
139+
setAuthProperties(props)
136140
props
137141
}
138142

143+
private def setAuthProperties(props: Properties) =
144+
oauth match {
145+
case Some(configuredOAuth) =>
146+
props.put("AuthMech", 11)
147+
props.put("Auth_Flow", 1)
148+
props.put("OAuth2ClientId", configuredOAuth.clientId)
149+
props.put("OAuth2Secret", configuredOAuth.clientSecret)
150+
case None =>
151+
// When no OAuth use default, legacy personal access tokens (represented by 'Credentials' class)
152+
props.put("AuthMech", 3)
153+
}
154+
139155
override def eventsLoadAuthMethod: LoadAuthMethod = loadAuthMethod
140156
override def foldersLoadAuthMethod: LoadAuthMethod = loadAuthMethod
141157

142158
override def reportRecoveryTableMetrics: Boolean = false
143159
}
144160

161+
object Databricks {
162+
final case class OAuth(clientId: String, clientSecret: String)
163+
164+
implicit def oauthConfigDecoder: Decoder[OAuth] =
165+
deriveDecoder[OAuth]
166+
}
167+
145168
final case class Snowflake(
146169
snowflakeRegion: Option[String],
147170
username: String,
@@ -159,6 +182,8 @@ object StorageTarget {
159182
readyCheck: Snowflake.ReadyCheck
160183
) extends StorageTarget {
161184

185+
override def credentials: Option[Credentials] = Some(Credentials(username, password))
186+
162187
override def connectionUrl: String =
163188
host match {
164189
case Right(h) =>
@@ -352,6 +377,8 @@ object StorageTarget {
352377
/** Destination socket for SSH tunnel - usually DB socket inside private network */
353378
final case class DestinationConfig(host: String, port: Int)
354379

380+
final case class Credentials(username: String, password: PasswordConfig)
381+
355382
/**
356383
* ADT representing fact that password can be either plain-text or encrypted in EC2 Parameter
357384
* Store or GCP Secret Manager

modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/dsl/Environment.scala

+9-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ object Environment {
9595
httpClient <- BlazeClientBuilder[F].withExecutionContext(global.compute).resource
9696
implicit0(logger: Logger[F]) = Slf4jLogger.getLogger[F]
9797
iglu <- Iglu.igluInterpreter(httpClient, cli.resolverConfig)
98-
implicit0(logging: Logging[F]) =
99-
Logging.loggingInterpreter[F](List(cli.config.storage.password.getUnencrypted, cli.config.storage.username))
98+
implicit0(logging: Logging[F]) = getLoggingInterpreter[F](cli.config)
10099
implicit0(random: Random[F]) <- Resource.eval(Random.scalaUtilRandom[F])
101100
tracker <- Monitoring.initializeTracking[F](cli.config.monitoring, httpClient)
102101
sentry <- Sentry.init[F](cli.config.monitoring.sentry.map(_.dsn))
@@ -148,6 +147,14 @@ object Environment {
148147
telemetry
149148
)
150149

150+
private def getLoggingInterpreter[F[_]: Async](config: Config[StorageTarget]): Logging[F] = {
151+
val stopWords = config.storage.credentials match {
152+
case Some(configuredCredentials) => List(configuredCredentials.password.getUnencrypted, configuredCredentials.username)
153+
case None => List.empty
154+
}
155+
Logging.loggingInterpreter[F](stopWords)
156+
}
157+
151158
def createCloudServices[F[_]: Async: Logger: Cache](
152159
config: Config[StorageTarget],
153160
control: Control[F]

modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/dsl/Transaction.scala

+32-14
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,13 @@ trait Transaction[F[_], C[_]] {
7474
object Transaction {
7575

7676
/** Should be enough for all monitoring and loading */
77-
val PoolSize = 4
77+
private val PoolSize = 4
7878

7979
def apply[F[_], C[_]](implicit ev: Transaction[F, C]): Transaction[F, C] = ev
8080

81-
def configureHikari[F[_]: Sync](target: StorageTarget, ds: HikariConfig): F[Unit] =
81+
private def configureHikari[F[_]: Sync: SecretStore](target: StorageTarget, ds: HikariConfig): F[Unit] =
8282
Sync[F].delay {
83+
ds.setJdbcUrl(target.connectionUrl)
8384
ds.setAutoCommit(target.withAutoCommit)
8485
ds.setMaximumPoolSize(PoolSize)
8586

@@ -97,27 +98,44 @@ object Transaction {
9798
ds.setMinimumIdle(0)
9899

99100
ds.setDataSourceProperties(target.properties)
100-
}
101+
} *> setJdbcCredentials[F](target, ds)
101102

102-
def buildPool[F[_]: Async: SecretStore: Logging: Sleep](
103+
private def buildPool[F[_]: Async: SecretStore: Logging: Sleep](
103104
target: StorageTarget,
104105
retries: Config.Retries
105106
): Resource[F, Transactor[F]] =
106107
for {
107-
ce <- ExecutionContexts.fixedThreadPool[F](2)
108-
password <- target.password match {
109-
case StorageTarget.PasswordConfig.PlainText(text) =>
110-
Resource.pure[F, String](text)
111-
case StorageTarget.PasswordConfig.EncryptedKey(StorageTarget.EncryptedConfig(parameterName)) =>
112-
Resource.eval(SecretStore[F].getValue(parameterName))
113-
}
114-
xa <- HikariTransactor
115-
.newHikariTransactor[F](target.driver, target.connectionUrl, target.username, password, ce)
116-
_ <- Resource.eval(xa.configure(configureHikari[F](target, _)))
108+
xa <- getTransactor(target)
117109
xa <- Resource.pure(RetryingTransactor.wrap(retries, xa))
118110
xa <- target.sshTunnel.fold(Resource.pure[F, Transactor[F]](xa))(SSH.transactor(_, xa))
119111
} yield xa
120112

113+
private def getTransactor[F[_]: Async: SecretStore](target: StorageTarget): Resource[F, HikariTransactor[F]] =
114+
for {
115+
ec <- ExecutionContexts.fixedThreadPool[F](2)
116+
_ <- Resource.eval(Async[F].delay(Class.forName(target.driver)))
117+
xa <- HikariTransactor.initial[F](ec)
118+
_ <- Resource.eval(xa.configure(configureHikari[F](target, _)))
119+
} yield xa
120+
121+
private def setJdbcCredentials[F[_]: Sync: SecretStore](target: StorageTarget, ds: HikariConfig): F[Unit] =
122+
target.credentials match {
123+
case Some(configuredCredentials) =>
124+
getPassword[F](configuredCredentials).map { password =>
125+
ds.setUsername(configuredCredentials.username)
126+
ds.setPassword(password)
127+
}
128+
case None => Sync[F].unit
129+
}
130+
131+
private def getPassword[F[_]: Sync: SecretStore](credentials: StorageTarget.Credentials): F[String] =
132+
credentials.password match {
133+
case StorageTarget.PasswordConfig.PlainText(text) =>
134+
Sync[F].pure(text)
135+
case StorageTarget.PasswordConfig.EncryptedKey(StorageTarget.EncryptedConfig(parameterName)) =>
136+
SecretStore[F].getValue(parameterName)
137+
}
138+
121139
/**
122140
* Build a necessary (dry-run or real-world) DB interpreter as a `Resource`, which guarantees to
123141
* close a JDBC connection. If connection could not be acquired, it will retry several times

modules/loader/src/test/scala/com/snowplowanalytics/snowplow/rdbloader/config/CliConfigSpec.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class CliConfigSpec extends Specification {
9090
val result = CliConfig.parse[IO](cli).value.unsafeRunSync()
9191

9292
result must beRight.like { case CliConfig(config, _, resolverConfig) =>
93-
config.storage.password.getUnencrypted must beEqualTo("Supersecret password from substitution!")
93+
config.storage.credentials.get.password.getUnencrypted must beEqualTo("Supersecret password from substitution!")
9494
resolverConfig must beEqualTo(expectedResolver)
9595
}
9696

modules/loader/src/test/scala/com/snowplowanalytics/snowplow/rdbloader/config/StorageTargetSpec.scala

+6-1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ class StorageTargetSpec extends Specification {
162162
"port": 443,
163163
"httpPath": "http/path",
164164
"password": "Supersecret1",
165+
"oauth": {
166+
"clientId": "client-id",
167+
"clientSecret": "client-secret"
168+
},
165169
"userAgent": "snowplow-rdbloader-oss",
166170
"eventsOptimizePeriod": "2 days",
167171
"loadAuthMethod": {
@@ -177,7 +181,8 @@ class StorageTargetSpec extends Specification {
177181
schema = "snowplow",
178182
port = 443,
179183
httpPath = "http/path",
180-
password = StorageTarget.PasswordConfig.PlainText("Supersecret1"),
184+
password = Some(StorageTarget.PasswordConfig.PlainText("Supersecret1")),
185+
oauth = Some(StorageTarget.Databricks.OAuth("client-id", "client-secret")),
181186
sshTunnel = None,
182187
userAgent = "snowplow-rdbloader-oss",
183188
loadAuthMethod = StorageTarget.LoadAuthMethod.NoCreds,

project/BuildSettings.scala

+7-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,13 @@ object BuildSettings {
243243
Docker / packageName := "rdb-loader-databricks",
244244
initialCommands := "import com.snowplowanalytics.snowplow.loader.databricks._",
245245
Compile / mainClass := Some("com.snowplowanalytics.snowplow.loader.databricks.Main"),
246-
Compile / unmanagedJars += file("DatabricksJDBC42.jar")
246+
Compile / unmanagedJars += file("DatabricksJDBC42.jar"),
247+
// used in extended configuration parsing unit tests
248+
Test / envVars := Map(
249+
"OAUTH_CLIENT_SECRET" -> "client-secret"
250+
),
251+
// envVars works only when fork is enabled
252+
Test / fork := true
247253
) ++ buildSettings ++ addExampleConfToTestCp ++ assemblySettings ++ dynVerSettings
248254

249255
lazy val transformerBatchBuildSettings =

0 commit comments

Comments
 (0)