-
Notifications
You must be signed in to change notification settings - Fork 6
feat: experimental Glean event #262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Plan Result (prospect-translation-lambda-cdk-production)
Change Result (Click me) # aws_lambda_function.translation-lambda_translation-sqs-lambda_B9BDF6BA will be updated in-place
~ resource "aws_lambda_function" "translation-lambda_translation-sqs-lambda_B9BDF6BA" {
id = "ProspectAPI-Prod-Sqs-Translation-Function"
tags = {
"app_code" = "content"
"component_code" = "content-prospectapi"
"env_code" = "prod"
"environment" = "Prod"
"service" = "ProspectAPI-Sqs-Translation"
}
# (22 unchanged attributes hidden)
~ environment {
~ variables = {
~ "GIT_SHA" = (sensitive value)
# (4 unchanged elements hidden)
}
}
# (4 unchanged blocks hidden)
}
Plan: 0 to add, 1 to change, 0 to destroy.
|
Plan Result (prospect-api-cdk-production)
Change Result (Click me) # data.aws_iam_policy_document.application_ecs_service_ecs-iam_data-ecs-task-role-policy_090CC3AD will be read during apply
# (depends on a resource or a module with changes pending)
<= data "aws_iam_policy_document" "application_ecs_service_ecs-iam_data-ecs-task-role-policy_090CC3AD" {
+ id = (known after apply)
+ json = (known after apply)
+ minified_json = (known after apply)
+ version = "2012-10-17"
+ statement {
+ actions = [
+ "dynamodb:BatchGet*",
+ "dynamodb:DescribeTable",
+ "dynamodb:Get*",
+ "dynamodb:Query",
+ "dynamodb:Scan",
+ "dynamodb:UpdateItem",
]
+ effect = "Allow"
+ resources = [
+ "arn:aws:dynamodb:us-east-1:996905175585:table/PROAPI-Prod-Prospects",
+ "arn:aws:dynamodb:us-east-1:996905175585:table/PROAPI-Prod-Prospects/*",
]
}
+ statement {
+ actions = [
+ "s3:*",
]
+ effect = "Allow"
+ resources = [
+ "arn:aws:s3:::pocket-prospectapi-prod-images",
+ "arn:aws:s3:::pocket-prospectapi-prod-images/*",
]
}
+ statement {
+ actions = [
+ "events:PutEvents",
]
+ effect = "Allow"
+ resources = [
+ "arn:aws:events:us-east-1:996905175585:event-bus/PocketEventBridge-Prod-Shared-Event-Bus",
]
}
+ statement {
+ actions = [
+ "logs:CreateLogGroup",
+ "logs:CreateLogStream",
+ "logs:DescribeLogGroups",
+ "logs:DescribeLogStreams",
+ "logs:PutLogEvents",
]
+ effect = "Allow"
+ resources = [
+ "*",
]
}
}
# aws_dynamodb_table.dynamodb_prospects_dynamodb_table_9854E41E will be updated in-place
~ resource "aws_dynamodb_table" "dynamodb_prospects_dynamodb_table_9854E41E" {
id = "PROAPI-Prod-Prospects"
name = "PROAPI-Prod-Prospects"
tags = {
"app_code" = "content"
"component_code" = "content-prospectapi"
"env_code" = "prod"
"environment" = "Prod"
"service" = "ProspectAPI"
}
# (9 unchanged attributes hidden)
- global_secondary_index {
- hash_key = "scheduledSurfaceGuid" -> null
- name = "scheduledSurfaceGuid-prospectType" -> null
- non_key_attributes = [] -> null
- projection_type = "ALL" -> null
- range_key = "prospectType" -> null
- read_capacity = 0 -> null
- write_capacity = 0 -> null
}
+ global_secondary_index {
+ hash_key = "scheduledSurfaceGuid"
+ name = "scheduledSurfaceGuid-prospectType"
+ non_key_attributes = []
+ projection_type = "ALL"
+ range_key = "prospectType"
+ read_capacity = 5
+ write_capacity = 5
}
# (5 unchanged blocks hidden)
}
# aws_iam_policy.application_ecs_service_ecs-iam_ecs-task-role-policy_6FC89FB6 will be updated in-place
~ resource "aws_iam_policy" "application_ecs_service_ecs-iam_ecs-task-role-policy_6FC89FB6" {
id = "arn:aws:iam::996905175585:policy/ProspectAPI-Prod-TaskRolePolicy"
name = "ProspectAPI-Prod-TaskRolePolicy"
~ policy = jsonencode(
{
- Statement = [
- {
- Action = [
- "dynamodb:UpdateItem",
- "dynamodb:Scan",
- "dynamodb:Query",
- "dynamodb:Get*",
- "dynamodb:DescribeTable",
- "dynamodb:BatchGet*",
]
- Effect = "Allow"
- Resource = [
- "arn:aws:dynamodb:us-east-1:996905175585:table/PROAPI-Prod-Prospects/*",
- "arn:aws:dynamodb:us-east-1:996905175585:table/PROAPI-Prod-Prospects",
]
},
- {
- Action = "s3:*"
- Effect = "Allow"
- Resource = [
- "arn:aws:s3:::pocket-prospectapi-prod-images/*",
- "arn:aws:s3:::pocket-prospectapi-prod-images",
]
},
- {
- Action = "events:PutEvents"
- Effect = "Allow"
- Resource = "arn:aws:events:us-east-1:996905175585:event-bus/PocketEventBridge-Prod-Shared-Event-Bus"
},
- {
- Action = [
- "logs:PutLogEvents",
- "logs:DescribeLogStreams",
- "logs:DescribeLogGroups",
- "logs:CreateLogStream",
- "logs:CreateLogGroup",
]
- Effect = "Allow"
- Resource = "*"
},
]
- Version = "2012-10-17"
}
) -> (known after apply)
tags = {
"app_code" = "content"
"component_code" = "content-prospectapi"
"env_code" = "prod"
"environment" = "Prod"
"service" = "ProspectAPI"
}
# (5 unchanged attributes hidden)
}
Plan: 0 to add, 2 to change, 0 to destroy. |
Plan Result (corpus-scheduler-lambda-cdk-production)
Change Result (Click me) # aws_lambda_function.corpus-scheduler-sqs-lambda_F2ECDF9F will be updated in-place
~ resource "aws_lambda_function" "corpus-scheduler-sqs-lambda_F2ECDF9F" {
id = "CorpusSchedulerLambda-Prod-SQS-Function"
~ qualified_arn = "arn:aws:lambda:us-east-1:996905175585:function:CorpusSchedulerLambda-Prod-SQS-Function:207" -> (known after apply)
~ qualified_invoke_arn = "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:996905175585:function:CorpusSchedulerLambda-Prod-SQS-Function:207/invocations" -> (known after apply)
tags = {
"app_code" = "content"
"component_code" = "content-corpusschedulerlambda"
"env_code" = "prod"
"environment" = "Prod"
"service" = "CorpusSchedulerLambda"
}
~ version = "207" -> (known after apply)
# (20 unchanged attributes hidden)
~ environment {
~ variables = {
~ "GIT_SHA" = (sensitive value)
# (7 unchanged elements hidden)
}
}
# (4 unchanged blocks hidden)
}
Plan: 0 to add, 1 to change, 0 to destroy.
|
experimental_json: '', | ||
}; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To improve readability, I'd split this up in separate functions for approved and rejected extra data.
5a49af2
to
4750b4c
Compare
description: "Curator who last updated the item." | ||
authors_json: | ||
type: string | ||
description: "JSON-encoded list of authors." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gkatre Glean only supports string and boolean for server-side events. Is JSON-encoding a good way to store a list of authors for processing in BigQuery-ETL?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, we don't support JSON object metrics in server templates yet, but string should be possible to parse in BQ.
description: "Curator who created the item." | ||
updated_at: | ||
type: string | ||
description: "Unix timestamp (in seconds) for last item update." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Glean events support 3 types:
type: The type of value this extra key can hold. One of string, boolean, quantity. Defaults to string. Recorded value is converted to string for transmission. Note: If not specified only the legacy API on record is available.
However, currently quantity
doesn't seem to be supported by the Glean parser for server-side events.
@gkatre @akkomar Is string
or quantity
better for timestamps? If quantity
is preferred, I can see if it's feasible to support it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's an extra key which gets converted to a string for transport anyway (see https://mozilla.github.io/glean/book/reference/metrics/event.html#events).
I think in order to support quantity we'd need to have proper type conversion in https://github.com/mozilla/glean_parser/blob/main/glean_parser/templates/javascript_server.jinja2#L227 via https://github.com/mozilla/glean_parser/blob/main/glean_parser/javascript_server.py#L63-L64 like in Go parser. I can look into this sometime next week, or feel free take a stab if you want and have time.
Goal
In support of reporting and ML HNT-414, we need to have corpus metadata available in BigQuery. This PR emits Glean events for corpus items, corresponding to the reviewed_corpus_item Snowplow event.
Deployment steps
Data review
References
JIRA ticket: MC-1661
Documentation:
Slack threads:
QA
Glean event:
payload from the above event