Skip to content

Commit

Permalink
Merge pull request #10 from snowplow-incubator/enrichments
Browse files Browse the repository at this point in the history
Add support for self-service enrichments
  • Loading branch information
miike authored Feb 10, 2025
2 parents 2c0e039 + 54f1946 commit 0798be0
Show file tree
Hide file tree
Showing 23 changed files with 395 additions and 47 deletions.
27 changes: 0 additions & 27 deletions app/app.py

This file was deleted.

2 changes: 0 additions & 2 deletions app/requirements.txt

This file was deleted.

4 changes: 2 additions & 2 deletions app/Dockerfile → control-plane/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM python:3.12
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
COPY app.py app.py
98 changes: 98 additions & 0 deletions control-plane/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from base64 import b64decode, b64encode
from glob import glob
import json
import logging

import docker
import requests
from flask import Flask, request
from flask_cors import CORS


app = Flask(__name__)
CORS(app)
logging.getLogger('flask_cors').level = logging.DEBUG

valid_containers = ['snowplow-stream-collector', 'snowplow-enrich', 'snowplow-iglu-server',
'connect', 'snowflake-streaming-loader', 'snowflake-streaming-loader-incomplete',
'lake-loader', 'bigquery-loader', 'snowbridge', 'ngrok-tunnel']

client = docker.from_env()

PATH = '../enrich/enrichments'
## Container API

def restart(name: str):
try:
if name in valid_containers:
container = client.containers.get(name)
container.restart()
return {'restart': True}
else:
return {'restart': False, 'error': 'Invalid container name'}
except Exception as e:
return {'restarted': False, 'exception': str(e)}

@app.route('/containers/restart')
def restart_container():
name = request.args.get('container')
return restart(name)

@app.route('/containers/start')
def start_container():
name = request.args.get('container')
try:
if name in valid_containers:
container = client.containers.get(name)
container.start()
return {'start': True}
else:
return {'start': False, 'error': 'Invalid container name'}
except Exception as e:
return {'start': False, 'exception': str(e)}

@app.route('/containers/list')
def list_containers():
containers = client.containers.list()
output = [{'name': container.name, 'status': container.status} for container in containers if container.name in valid_containers]
return output

@app.route('/containers/status')
def container_status():
name = request.args.get('container')
try:
container = client.containers.get(name)
return {'status': container.status}
except Exception as e:
return {'status': 'error', 'exception': str(e)}


## Enrichment API

## List enrichments
@app.route('/pipelines/enrichments/', methods=['GET'])
def read_enrichments():
enrichments = []
for filename in glob(f'{PATH}/*.json'):
print(filename)
with open(filename, 'r') as file:
enrichments.append({
**json.loads(file.read())
})
return enrichments

## Write single enrichment
@app.route('/pipelines/enrichments/', methods=['POST'])
def write_enrichment():
data = request.json
enrichment_name = data['data']['name']
if enrichment_name == 'javascript_script_config':
script = data['data']['parameters']['script'].encode()
data['data']['parameters']['script'] = b64encode(script).decode('utf-8')

with open(f'{PATH}/{enrichment_name}.json', 'w') as file:
file.write(json.dumps(data, indent=4))
restart('snowplow-enrich')
return {'success': True}


3 changes: 3 additions & 0 deletions control-plane/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
flask==3.0.3
flask-cors==5.0.0
docker==7.1.0
17 changes: 15 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ services:

local-failed-ui:
container_name: "local-failed-ui"
image: "snowplow/failed-events-ui:0.0.1"
image: "snowplow/failed-events-ui:0.0.2"
ports:
- "3001:3000"

Expand All @@ -305,4 +305,17 @@ services:
- "./tunnel/ngrok.yml:/etc/ngrok.yml"
- "./tunnel/policy.yml:/etc/policy.yml"
ports:
- "4040:4040"
- "4040:4040"

control-plane:
container_name: "snowplow-control-plane"
build: "control-plane/"
command: "flask run --host=0.0.0.0 --debug"
environment:
- "FLASK_APP=app"
volumes:
- "./enrich:/enrich"
- "/var/run/docker.sock:/var/run/docker.sock"
ports:
- "8083:5000"

11 changes: 11 additions & 0 deletions enrich/enrichments/anon_ip.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/anon_ip/jsonschema/1-0-0",
"data": {
"name": "anon_ip",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": false,
"parameters": {
"anonOctets": 1
}
}
}
43 changes: 43 additions & 0 deletions enrich/enrichments/api_request_enrichment_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/api_request_enrichment_config/jsonschema/1-0-0",
"data": {
"name": "api_request_enrichment_config",
"vendor": "com.snowplowanalytics.snowplow.enrichments",
"enabled": false,
"parameters": {
"inputs": [
{
"key": "user",
"pojo": {
"field": "user_id"
}
}
],
"api": {
"http": {
"method": "GET",
"uri": "http://api.acme.com/users/{{client}}/{{user}}?format=json",
"timeout": 5000,
"authentication": {
"httpBasic": {
"username": "NA",
"password": "NA"
}
}
}
},
"outputs": [
{
"schema": "iglu:com.acme/user/jsonschema/1-0-0",
"json": {
"jsonPath": "$.record"
}
}
],
"cache": {
"size": 3000,
"ttl": 60
}
}
}
}
18 changes: 18 additions & 0 deletions enrich/enrichments/campaign_attribution.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/campaign_attribution/jsonschema/1-0-0",
"data": {
"name": "campaign_attribution",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": true,
"parameters": {
"mapping": "static",
"fields": {
"mktMedium": ["utm_medium"],
"mktSource": ["utm_source"],
"mktTerm": ["utm_term"],
"mktContent": ["utm_content"],
"mktCampaign": ["utm_campaign"]
}
}
}
}
11 changes: 11 additions & 0 deletions enrich/enrichments/cookie_extractor_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/cookie_extractor_config/jsonschema/1-0-0",
"data": {
"name": "cookie_extractor_config",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": false,
"parameters": {
"cookies": []
}
}
}
8 changes: 8 additions & 0 deletions enrich/enrichments/cross_navigation_enrichment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/cross_navigation_config/jsonschema/1-0-0",
"data": {
"enabled": false,
"vendor": "com.snowplowanalytics.snowplow.enrichments",
"name": "cross_navigation_config"
}
}
14 changes: 14 additions & 0 deletions enrich/enrichments/currency_conversion_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/currency_conversion_config/jsonschema/1-0-0",
"data": {
"enabled": false,
"vendor": "com.snowplowanalytics.snowplow",
"name": "currency_conversion_config",
"parameters": {
"accountType": "DEVELOPER",
"apiKey": "PLACEHOLDER",
"baseCurrency": "EUR",
"rateAt": "EOD_PRIOR"
}
}
}
17 changes: 17 additions & 0 deletions enrich/enrichments/event_fingerprint_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"data": {
"enabled": true,
"name": "event_fingerprint_config",
"parameters": {
"excludeParameters": [
"cv",
"eid",
"nuid",
"stm"
],
"hashAlgorithm": "SHA1"
},
"vendor": "com.snowplowanalytics.snowplow"
},
"schema": "iglu:com.snowplowanalytics.snowplow/event_fingerprint_config/jsonschema/1-0-1"
}
11 changes: 11 additions & 0 deletions enrich/enrichments/http_header_extractor_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/http_header_extractor_config/jsonschema/1-0-0",
"data": {
"name": "http_header_extractor_config",
"vendor": "com.snowplowanalytics.snowplow.enrichments",
"enabled": false,
"parameters": {
"headersPattern": ".*"
}
}
}
22 changes: 22 additions & 0 deletions enrich/enrichments/iab_spiders_and_robots_enrichment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/iab_spiders_and_robots_enrichment/jsonschema/1-0-0",
"data": {
"name": "iab_spiders_and_robots_enrichment",
"vendor": "com.snowplowanalytics.snowplow.enrichments",
"enabled": false,
"parameters": {
"ipFile": {
"database": "ip_exclude_current_cidr.txt",
"uri": "gs://bucket-name"
},
"excludeUseragentFile": {
"database": "exclude_current.txt",
"uri": "gs://bucket-name"
},
"includeUseragentFile": {
"database": "include_current.txt",
"uri": "gs://bucket-name"
}
}
}
}
14 changes: 14 additions & 0 deletions enrich/enrichments/ip_lookups.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/ip_lookups/jsonschema/2-0-0",
"data": {
"name": "ip_lookups",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": false,
"parameters": {
"geo": {
"database": "GeoLite2-City.mmdb",
"uri": "gs://max-mind-geolite2/"
}
}
}
}
11 changes: 11 additions & 0 deletions enrich/enrichments/javascript_script_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"schema": "iglu:com.snowplowanalytics.snowplow/javascript_script_config/jsonschema/1-0-1",
"data": {
"vendor": "com.snowplowanalytics.snowplow",
"name": "javascript_script_config",
"parameters": {
"script": "="
},
"enabled": false
}
}
Loading

0 comments on commit 0798be0

Please sign in to comment.