Skip to content

Commit 2a6404b

Browse files
author
EC2 Default User
committed
Adding BindingDB. Adding new RDS source format.
1 parent 304dcbd commit 2a6404b

18 files changed

+531
-45
lines changed

DeployChemblOpenTargetsEnv.sh

+3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
npm run build
33
cdk bootstrap
44
currentPrincipalArn=$(aws sts get-caller-identity --query Arn --output text)
5+
#Just in case you are using an IAM role, we will switch the identity from your STS arn to the underlying role ARN.
6+
currentPrincipalArn=$(sed 's/\(sts\)\(.*\)\(assumed-role\)\(.*\)\(\/.*\)/iam\2role\4/' <<< $currentPrincipalArn)
57
jq '.context.starterLakeFormationAdmin = $currentPrincipalArn' --arg currentPrincipalArn $currentPrincipalArn cdk.json > tmp.$$.json && mv tmp.$$.json cdk.json
68
cdk deploy BaselineStack --require-approval never
79
cdk deploy CoreDataLake --require-approval never
810
cdk deploy ChemblStack --require-approval never
911
cdk deploy OpenTargetsStack --require-approval never
12+
cdk deploy BindingDbStack --require-approval never
1013
cdk deploy AnalyticsStack --require-approval never

RODA_templates/empty.ts

Whitespace-only changes.

SynthRodaTemplates.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
aws glue get-tables --database-name opentargets_1911_dl > RODA_templates/open_targets_1911_get_tables.json
2+
aws glue get-database --name opentargets_1911_dl > RODA_templates/open_targets_1911_get_database.json
3+
npm run build && cdk synth OpenTargetsRodaTemplate
4+
aws s3 cp cdk.out/OpenTargetsRodaTemplate.template.json s3://aws-roda-hcls-datalake/OpenTargetsRodaTemplate.json
5+
6+
7+
aws glue get-tables --database-name chembl_25_dl > RODA_templates/chembl_25_get_tables.json
8+
aws glue get-database --name chembl_25_dl > RODA_templates/chembl_25_get_database.json
9+
npm run build && cdk synth ChemblRodaTemplate
10+
aws s3 cp cdk.out/ChemblRodaTemplate.template.json s3://aws-roda-hcls-datalake/ChemblRodaTemplate.json
11+
12+
aws glue get-tables --database-name binding_db_dl > RODA_templates/binding_db_get_tables.json
13+
aws glue get-database --name binding_db_dl > RODA_templates/binding_db_get_database.json
14+
npm run build && cdk synth BindingDbRodaTemplate
15+
aws s3 cp cdk.out/BindingDbRodaTemplate.template.json s3://aws-roda-hcls-datalake/BindingDbRodaTemplate.json
16+
17+
#https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Faws-roda-hcls-datalake.s3.amazonaws.com%2FChemblRodaTemplate.json&stackName=Chembl25-RODA
18+
#https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Faws-roda-hcls-datalake.s3.amazonaws.com%2FOpenTargetsRodaTemplate.json&stackName=OpenTargets-1911-RODA
19+
#https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/quickcreate?templateUrl=https%3A%2F%2Faws-roda-hcls-datalake.s3.amazonaws.com%BindingDbRodaTemplate.json&stackName=BindingDB-RODA

bin/aws.ts

+31-8
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@ import { BaselineStack } from '../lib/baseline-stack';
55
import { DataLakeStack } from '../lib/stacks/datalake-stack';
66
import { OpenTargetsStack } from '../lib/opentargets-stack';
77
import { ChemblStack } from '../lib/chembl-25-stack';
8-
import { AnalyticsStack } from '../lib/analytics-stack.js';
8+
import { BindingDBStack } from '../lib/bindingdb-stack';
9+
import { AnalyticsStack } from '../lib/analytics-stack';
910
import iam = require('@aws-cdk/aws-iam');
1011
import s3 = require('@aws-cdk/aws-s3');
1112
import { DataLakeEnrollment } from '../lib/constructs/data-lake-enrollment';
13+
import { DataSetTemplateStack } from '../lib/stacks/dataset-stack';
14+
1215

1316
const app = new cdk.App();
1417
const baseline = new BaselineStack(app, 'BaselineStack');
@@ -31,22 +34,42 @@ const openTargetsStack = new OpenTargetsStack(app, 'OpenTargetsStack', {
3134
DataLake: coreDataLake
3235
});
3336

37+
console.log('basic before binding dbstack');
38+
39+
const bindingDBStack = new BindingDBStack(app, 'BindingDbStack', {
40+
database: baseline.BindingDb,
41+
accessSecurityGroup: baseline.BindingDBAccessSg,
42+
databaseSecret: baseline.BindingDBSecret,
43+
DataLake: coreDataLake
44+
});
45+
3446
const analyticsStack = new AnalyticsStack(app, 'AnalyticsStack', {
3547
targetVpc: baseline.Vpc,
3648
});
3749

3850

51+
// chemblStack.grantIamRead(analyticsStack.NotebookRole);
52+
// openTargetsStack.grantIamRead(analyticsStack.NotebookRole);
53+
// bindingDBStack.grantIamRead(analyticsStack.NotebookRole);
3954

4055

56+
const OpenTargetsRodaTemplate = new DataSetTemplateStack(app, 'OpenTargetsRodaTemplate', {
57+
DatabaseDescriptionPath: "../../RODA_templates/open_targets_1911_get_database.json",
58+
DescribeTablesPath: "../../RODA_templates/open_targets_1911_get_tables.json",
59+
DataSetName: openTargetsStack.Enrollment.DataSetName
60+
});
4161

42-
chemblStack.grantIamRead(analyticsStack.NotebookRole);
43-
openTargetsStack.grantIamRead(analyticsStack.NotebookRole);
44-
45-
46-
47-
48-
62+
const ChemblRodaTemplate = new DataSetTemplateStack(app, 'ChemblRodaTemplate', {
63+
DatabaseDescriptionPath: "../../RODA_templates/chembl_25_get_database.json",
64+
DescribeTablesPath: "../../RODA_templates/chembl_25_get_tables.json",
65+
DataSetName: chemblStack.Enrollment.DataSetName
66+
});
4967

68+
const BindinbDbRodaTemplate = new DataSetTemplateStack(app, 'BindingDbRodaTemplate', {
69+
DatabaseDescriptionPath: "../../RODA_templates/binding_db_get_database.json",
70+
DescribeTablesPath: "../../RODA_templates/binding_db_get_tables.json",
71+
DataSetName: bindingDBStack.Enrollment.DataSetName
72+
});
5073

5174

5275
// const exampleUser = iam.User.fromUserName(coreDataLake, 'exampleGrantee', 'paul1' );

lib/baseline-stack.ts

+118
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import iam = require('@aws-cdk/aws-iam');
44
import rds = require('@aws-cdk/aws-rds');
55
import ssm = require('@aws-cdk/aws-ssm');
66
import s3 = require('@aws-cdk/aws-s3');
7+
import s3assets = require('@aws-cdk/aws-s3-assets');
78
import fs = require('fs');
89

910

@@ -16,6 +17,10 @@ export class BaselineStack extends cdk.Stack {
1617
public readonly chemblDBSecret: rds.DatabaseSecret;
1718
public readonly OpenTargetsSourceBucket: s3.Bucket;
1819
public readonly Vpc: ec2.Vpc;
20+
public readonly BindingDBSourceBucket: s3.Bucket;
21+
public readonly BindingDb: rds.DatabaseInstance;
22+
public readonly BindingDBAccessSg: ec2.SecurityGroup;
23+
public readonly BindingDBSecret: rds.DatabaseSecret;
1924

2025
constructor(scope: cdk.Construct, id: string, props?: cdk.StackProps) {
2126

@@ -166,6 +171,119 @@ export class BaselineStack extends cdk.Stack {
166171
openTargetsSourceFileTargetBucketLocation: [openTargetsBucket.bucketName]
167172
});
168173

174+
//// Start Binding DB ////
175+
176+
const bindingDbAccessSg = new ec2.SecurityGroup(this, 'bindingDbAccessSg', {
177+
vpc: baselineVpc,
178+
allowAllOutbound: true,
179+
description: "Grants access to the BindingDB rds instance",
180+
securityGroupName: "BindingDBAccessSecurityGroup"
181+
});
182+
183+
this.BindingDBSourceBucket = new s3.Bucket(this, 'BindingDbSourceBucket');
184+
185+
this.BindingDBAccessSg = bindingDbAccessSg;
186+
187+
const bindingDbSg = new ec2.SecurityGroup(this, 'bindingDbSg', {
188+
vpc: baselineVpc,
189+
allowAllOutbound: true,
190+
description: "Security group for binding dbs",
191+
securityGroupName: "BindingDbSecurityGroup"
192+
});
193+
194+
195+
196+
197+
bindingDbAccessSg.addIngressRule( bindingDbAccessSg , ec2.Port.allTraffic(), "Recursive SG rule for Glue" );
198+
199+
bindingDbSg.addIngressRule( bindingDbAccessSg , ec2.Port.tcp(1512), "Gives BindingDB access security group access to oracle port" );
200+
201+
importInstance.addSecurityGroup(bindingDbAccessSg);
202+
203+
204+
const bindingDBSecret = new rds.DatabaseSecret(this, 'bindingDbSecret', {
205+
username: 'master',
206+
});
207+
this.BindingDBSecret = bindingDBSecret;
208+
209+
bindingDBSecret.grantRead(importInstanceRole);
210+
this.BindingDBSourceBucket.grantReadWrite(importInstanceRole);
211+
212+
213+
const bindingDbOptionGroup = new rds.OptionGroup(this, 'bindingDbRdsOptionGroup',{
214+
engine: rds.DatabaseInstanceEngine.oracleSe2({
215+
version: rds.OracleEngineVersion.VER_19, // different version class for each engine type
216+
}),
217+
description: "Binding DB Option Group",
218+
configurations: [{
219+
name: "S3_INTEGRATION",
220+
version: "1.0"
221+
}],
222+
});
223+
224+
const bindingDb = new rds.DatabaseInstance(this, 'bindingDb', {
225+
engine: rds.DatabaseInstanceEngine.ORACLE_SE2,
226+
masterUsername: 'master',
227+
licenseModel: rds.LicenseModel.BRING_YOUR_OWN_LICENSE,
228+
vpc: baselineVpc,
229+
vpcPlacement: appSubnetSelection,
230+
optionGroup: bindingDbOptionGroup,
231+
instanceType: ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.SMALL),
232+
instanceIdentifier: 'binding-db',
233+
masterUserPassword: bindingDBSecret.secretValueFromJson('password'),
234+
securityGroups: [bindingDbSg, bindingDbAccessSg],
235+
deletionProtection: false,
236+
});
237+
this.BindingDb = bindingDb;
238+
239+
var bindingDbCfnDb = this.BindingDb.node.defaultChild as rds.CfnDBInstance;
240+
241+
const bindingDbRdsImportRole = new iam.Role(this, 'BindingDbRdsInstanceRole', {
242+
assumedBy: new iam.ServicePrincipal('rds.amazonaws.com')
243+
});
244+
this.BindingDBSourceBucket.grantReadWrite(bindingDbRdsImportRole);
245+
246+
bindingDbCfnDb.associatedRoles = [{
247+
featureName: "S3_INTEGRATION",
248+
roleArn: bindingDbRdsImportRole.roleArn
249+
}];
250+
251+
252+
const loadBindingDbDoc = new ssm.CfnDocument(this, 'loadBindingDbDoc', {
253+
content: JSON.parse(fs.readFileSync('scripts/ssmdoc.importbindingdb.json', { encoding: 'utf-8' })),
254+
documentType: "Command"
255+
});
256+
257+
258+
const instantClientBasic = new s3assets.Asset(this, `instantClientBasicRpm`, {
259+
path: "oracle-instantclient19.8-basic-19.8.0.0.0-1.x86_64.rpm"
260+
});
261+
instantClientBasic.grantRead(importInstanceRole);
262+
const instantClientSqlPlus = new s3assets.Asset(this, `instantClientSqlPlusRpm`, {
263+
path: "oracle-instantclient19.8-sqlplus-19.8.0.0.0-1.x86_64.rpm"
264+
});
265+
instantClientSqlPlus.grantRead(importInstanceRole);
266+
267+
268+
const loadBindingDbAssociation = new ssm.CfnAssociation(this, 'loadBindingDbAssociation',{
269+
name: loadBindingDbDoc.ref,
270+
targets: [
271+
{ key: "InstanceIds", values: [importInstance.instanceId] }
272+
]
273+
});
274+
275+
loadBindingDbAssociation.addPropertyOverride('Parameters',{
276+
databaseSecretArn: [this.BindingDBSecret.secretArn],
277+
databaseHostName: [this.BindingDb.dbInstanceEndpointAddress],
278+
databaseDmpS3Location: [this.BindingDBSourceBucket.bucketName],
279+
instantClientBasicS3Path: [instantClientBasic.s3ObjectUrl],
280+
instantClientSqlPlusS3Path: [instantClientSqlPlus.s3ObjectUrl],
281+
executionTimeout: ['7200']
282+
});
283+
284+
//// End Binding DB ////
169285

170286
}
287+
288+
171289
}

lib/bindingdb-stack.ts

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import * as cdk from '@aws-cdk/core';
2+
import ec2 = require('@aws-cdk/aws-ec2');
3+
import iam = require('@aws-cdk/aws-iam');
4+
import rds = require('@aws-cdk/aws-rds');
5+
import glue = require('@aws-cdk/aws-glue');
6+
import s3 = require('@aws-cdk/aws-s3');
7+
import s3assets = require('@aws-cdk/aws-s3-assets');
8+
import { RDSdataSetSetEnrollmentProps, RDSOracleDataSetEnrollment } from './constructs/rds-data-set-enrollment';
9+
import { DataSetStack, DataSetStackProps} from './stacks/dataset-stack';
10+
11+
12+
13+
14+
export interface BindingDBEnrollmentProps extends DataSetStackProps {
15+
databaseSecret: rds.DatabaseSecret;
16+
database: rds.DatabaseInstance;
17+
accessSecurityGroup: ec2.SecurityGroup;
18+
}
19+
20+
export class BindingDBStack extends DataSetStack{
21+
constructor(scope: cdk.Construct, id: string, props: BindingDBEnrollmentProps) {
22+
super(scope, id, props);
23+
24+
25+
const dataSetName = "binding_db";
26+
27+
this.Enrollment = new RDSOracleDataSetEnrollment(this, 'binding-db-enrollment', {
28+
databaseSecret: props.databaseSecret,
29+
database: props.database,
30+
databaseSidOrServiceName: "orcl",
31+
accessSecurityGroup: props.accessSecurityGroup,
32+
dataLakeBucket: props.DataLake.DataLakeBucket,
33+
DataSetName: dataSetName,
34+
JdbcTargetIncludePaths: ["orcl/%"],
35+
GlueScriptPath: "scripts/glue.s3import.bindingdb.py",
36+
GlueScriptArguments: {
37+
"--job-language": "python",
38+
"--job-bookmark-option": "job-bookmark-disable",
39+
"--enable-metrics": "",
40+
"--DL_BUCKET": props.DataLake.DataLakeBucket.bucketName,
41+
"--DL_PREFIX": "/"+dataSetName+"/",
42+
"--DL_REGION": cdk.Stack.of(this).region,
43+
"--GLUE_SRC_DATABASE": "binding_db_src"
44+
}
45+
});
46+
47+
48+
}
49+
}
50+
51+
52+
53+

lib/chembl-25-stack.ts

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export class ChemblStack extends DataSetStack{
2828
this.Enrollment = new RDSPostgresDataSetEnrollment(this, 'chembl-25-enrollment', {
2929
databaseSecret: props.databaseSecret,
3030
database: props.database,
31+
databaseSidOrServiceName: "chembl_25",
3132
accessSecurityGroup: props.accessSecurityGroup,
3233
dataLakeBucket: props.DataLake.DataLakeBucket,
3334
DataSetName: dataSetName,

lib/constructs/data-lake-enrollment.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,11 @@ export class DataLakeEnrollment extends cdk.Construct {
398398

399399

400400
const resolvedPrincipalType = this.determinePrincipalType(principal);
401-
401+
402+
403+
402404
if(resolvedPrincipalType === iam.Role){
405+
console.log("coarseAthenaAccessPolicy");
403406
this.CoarseAthenaAccessPolicy.attachToRole(principal as iam.Role);
404407
this.CoarseResourceAccessPolicy.attachToRole(principal as iam.Role);
405408
this.CoarseIamPolciesApplied = true;
@@ -416,7 +419,6 @@ export class DataLakeEnrollment extends cdk.Construct {
416419

417420

418421

419-
420422
}
421423

422424

0 commit comments

Comments
 (0)