Skip to content
This repository was archived by the owner on Jun 20, 2025. It is now read-only.

Commit 9207225

Browse files
markslivafacebook-github-bot
authored andcommitted
Move data bucket creation out of the data_ingestion TF (#1530)
Summary: Pull Request resolved: #1530 The data bucket creation was previously bundled into the data_ingestion terraform template. This prevents limiting the s3 access to only the necessary bucket for the PCE, which will be updated in a followup. Reviewed By: ankushksingh Differential Revision: D39197009 fbshipit-source-id: 0f21427da1c90e2b54807ec030fbe29244dbe981
1 parent 7cf948f commit 9207225

File tree

5 files changed

+15
-66
lines changed

5 files changed

+15
-66
lines changed

fbpcs/infra/cloud_bridge/data_ingestion/main.tf

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ resource "aws_kinesis_firehose_delivery_stream" "extended_s3_stream" {
3434

3535
extended_s3_configuration {
3636
role_arn = aws_iam_role.firehose_role.arn
37-
bucket_arn = aws_s3_bucket.bucket.arn
37+
bucket_arn = var.data_processing_output_bucket_arn
3838
buffer_size = 128
3939
buffer_interval = 900
4040
prefix = "${var.events_data}/year=!{partitionKeyFromLambda:year}/month=!{partitionKeyFromLambda:month}/day=!{partitionKeyFromLambda:day}/hour=!{partitionKeyFromLambda:hour}/"
@@ -57,44 +57,6 @@ resource "aws_kinesis_firehose_delivery_stream" "extended_s3_stream" {
5757
}
5858
}
5959

60-
resource "aws_s3_bucket" "bucket" {
61-
bucket = var.data_processing_output_bucket
62-
versioning {
63-
enabled = true
64-
}
65-
server_side_encryption_configuration {
66-
rule {
67-
apply_server_side_encryption_by_default {
68-
sse_algorithm = "AES256"
69-
}
70-
}
71-
}
72-
73-
}
74-
75-
resource "aws_s3_bucket_policy" "bucket_policy" {
76-
bucket = aws_s3_bucket.bucket.id
77-
78-
policy = <<EOF
79-
{
80-
"Statement": [
81-
{
82-
"Effect": "Deny",
83-
"Action": "s3:*",
84-
"Principal": "*",
85-
"Resource": [
86-
"${aws_s3_bucket.bucket.arn}",
87-
"${aws_s3_bucket.bucket.arn}/*"
88-
],
89-
"Condition": {
90-
"Bool": { "aws:SecureTransport": false }
91-
}
92-
}
93-
]
94-
}
95-
EOF
96-
}
97-
9860
resource "aws_iam_role" "firehose_role" {
9961
name = "cb-data-ingestion-firehose-role${var.tag_postfix}"
10062

fbpcs/infra/cloud_bridge/data_ingestion/output.tf

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,3 @@
1-
output "data_processing_output_bucket_id" {
2-
value = aws_s3_bucket.bucket.id
3-
description = "The id of S3 bucked used to store data processing outputs"
4-
}
5-
6-
output "data_processing_output_bucket_arn" {
7-
value = aws_s3_bucket.bucket.arn
8-
description = "The arn of S3 bucked used to store data processing outputs"
9-
}
10-
111
output "firehose_stream_name" {
122
value = aws_kinesis_firehose_delivery_stream.extended_s3_stream.name
133
description = "The Kinesis firehose stream name"

fbpcs/infra/cloud_bridge/data_ingestion/variable.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ variable "data_processing_output_bucket" {
88
default = ""
99
}
1010

11+
variable "data_processing_output_bucket_arn" {
12+
description = "Amazon resource name of the data bucket"
13+
default = ""
14+
}
15+
1116
variable "data_ingestion_lambda_name" {
1217
description = "The data ingestion Lambda function name"
1318
default = ""

fbpcs/infra/cloud_bridge/deploy.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,11 @@ deploy_aws_resources() {
198198
#clean up previously generated resources if any
199199
cleanup_generated_resources
200200
# Create the S3 bucket (to store config files) if it doesn't exist
201-
log_streaming_data "creating s3 bucket, if it does not exist"
201+
log_streaming_data "creating s3 config bucket, if it does not exist"
202202
validate_or_create_s3_bucket "$s3_bucket_for_storage" "$region" "$aws_account_id"
203+
# Create the S3 data bucket if it doesn't exist
204+
log_streaming_data "creating s3 data bucket, if it does not exist"
205+
validate_or_create_s3_bucket "$s3_bucket_data_pipeline" "$region" "$aws_account_id"
203206
# Deploy PCE Terraform scripts
204207
onedocker_ecs_container_image='539290649537.dkr.ecr.us-west-2.amazonaws.com/one-docker-prod:latest'
205208
publisher_vpc_cidr='10.0.0.0/16'
@@ -291,15 +294,14 @@ deploy_aws_resources() {
291294
-var "tag_postfix=$tag_postfix" \
292295
-var "aws_account_id=$aws_account_id" \
293296
-var "data_processing_output_bucket=$s3_bucket_data_pipeline" \
297+
-var "data_processing_output_bucket_arn=$data_bucket_arn" \
294298
-var "data_ingestion_lambda_name=$data_ingestion_lambda_name" \
295299
-var "data_processing_lambda_s3_bucket=$s3_bucket_for_storage" \
296300
-var "data_processing_lambda_s3_key=lambda.zip" \
297301
-var "data_upload_key_path=$data_upload_key_path" \
298302
-var "query_results_key_path=$query_results_key_path"
299303
echo "######################## Deploy Data Ingestion Terraform scripts completed ########################"
300304
# store the outputs from data ingestion pipeline output into variables
301-
app_data_input_bucket_id=$(terraform output data_processing_output_bucket_id | tr -d '"')
302-
app_data_input_bucket_arn=$(terraform output data_processing_output_bucket_arn | tr -d '"')
303305
firehose_stream_name=$(terraform output firehose_stream_name | tr -d '"')
304306

305307
if "$build_semi_automated_data_pipeline"
@@ -312,7 +314,7 @@ deploy_aws_resources() {
312314
cp template/lambda_trigger.py .
313315
echo "Updating trigger function configurations..."
314316
sed -i "s/glueJobName = \"TO_BE_UPDATED_DURING_DEPLOYMENT\"/glueJobName = \"glue-ETL$tag_postfix\"/g" lambda_trigger.py
315-
sed -i "s~s3_write_path = \"TO_BE_UPDATED_DURING_DEPLOYMENT\"~s3_write_path = \"$app_data_input_bucket_id/events_data/\"~g" lambda_trigger.py
317+
sed -i "s~s3_write_path = \"TO_BE_UPDATED_DURING_DEPLOYMENT\"~s3_write_path = \"$s3_bucket_data_pipeline/events_data/\"~g" lambda_trigger.py
316318

317319
echo "######################## Initializing terraform working directory started ########################"
318320
terraform init -reconfigure \
@@ -328,8 +330,8 @@ deploy_aws_resources() {
328330
-var "aws_account_id=$aws_account_id" \
329331
-var "lambda_trigger_s3_key=lambda_trigger.zip" \
330332
-var "app_data_input_bucket=$s3_bucket_data_pipeline" \
331-
-var "app_data_input_bucket_id=$app_data_input_bucket_id" \
332-
-var "app_data_input_bucket_arn=$app_data_input_bucket_arn" \
333+
-var "app_data_input_bucket_id=$s3_bucket_data_pipeline" \
334+
-var "app_data_input_bucket_arn=$data_bucket_arn" \
333335
-var "data_upload_key_path=$data_upload_key_path"
334336
echo "######################## Deploy Semi-automated Data Ingestion Terraform scripts completed ########################"
335337
fi
@@ -406,6 +408,7 @@ else
406408
s3_bucket_data_pipeline="$s3_bucket_data_pipeline$tag_postfix"
407409
fi
408410

411+
data_bucket_arn="arn:aws:s3:::${s3_bucket_data_pipeline}"
409412
policy_name="fb-pc-policy${tag_postfix}"
410413
database_name="mpc-events-db${tag_postfix}"
411414
glue_crawler_name="mpc-events-crawler${tag_postfix}"

fbpcs/infra/cloud_bridge/util.sh

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -189,17 +189,6 @@ input_validation () {
189189
echo "The S3 bucket for storing processed data is $s3_bucket_data_pipeline".
190190
validate_bucket_name "$s3_bucket_data_pipeline"
191191

192-
if ! "$undeploy"
193-
then
194-
echo "making sure $s3_bucket_data_pipeline is not an existing bucket..."
195-
if aws s3api head-bucket --bucket "$s3_bucket_data_pipeline" --expected-bucket-owner "$aws_account_id" 2>&1 | grep -q "404" # bucekt doesn't exist
196-
then
197-
echo "The bucket $s3_bucket_data_pipeline doesn't exist. Continue..."
198-
else # bucket exists, we want the data-storage bucket to be new
199-
echo "The bucket $s3_bucket_data_pipeline already exists under Account $aws_account_id. Please choose another bucket name."
200-
exit 1
201-
fi
202-
fi
203192
echo "validate input: aws account id..."
204193
echo "Your AWS acount ID is $aws_account_id"
205194
account_A=$(aws sts get-caller-identity |grep -o 'Account":.*' | tr -d '"' | tr -d ' ' | tr -d ',' | cut -d':' -f2)

0 commit comments

Comments
 (0)