From 48b1f4db51ee4182759fbc9dde088fa531fa19bd Mon Sep 17 00:00:00 2001 From: Ankit Siva Date: Tue, 30 Jul 2024 15:17:09 -0700 Subject: [PATCH 1/7] feat: add user_id to event.schema --- schema/1.0.0/event.schema.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/schema/1.0.0/event.schema.json b/schema/1.0.0/event.schema.json index e6e89cb..16788bf 100644 --- a/schema/1.0.0/event.schema.json +++ b/schema/1.0.0/event.schema.json @@ -51,6 +51,12 @@ "maxLength": 100, "examples": ["5e3b2a1c-8b7d-4f2e-a3d4-c9b2e1f3a4b5","quepid-nightly-bot", "BugsBunny::Firefox@0967084"] }, + "user_id": { + "description": "The user ID associated with the person performing the interactions being logged on the site. Can be null/empty in case of an unauthenticated user.", + "type": "string", + "maxLength": 100, + "examples": ["5e3b2a1c-8b7d-4f2e-a3d4-c9b2e1f3a4b5"] + }, "timestamp": { "description": "When the event took place.", "type": "string", From c6d0e48f694daaad387d2d810e775bd54c51ae07 Mon Sep 17 00:00:00 2001 From: Ankit Siva Date: Wed, 7 Aug 2024 09:08:56 -0700 Subject: [PATCH 2/7] feat[event.schema.json]: add session_id --- schema/1.0.0/event.schema.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/schema/1.0.0/event.schema.json b/schema/1.0.0/event.schema.json index e6e89cb..10749a5 100644 --- a/schema/1.0.0/event.schema.json +++ b/schema/1.0.0/event.schema.json @@ -4,7 +4,7 @@ "title": "Event tracking for UBI", "description": "Version 1.0.0; last updated 2024-06-14. An event that occurred, typically in response to a user.", "type": "object", - "required": ["action_name", "query_id", "timestamp"], + "required": ["action_name", "query_id", "session_id", "timestamp"], "properties": { "application": { "description": "name of the application tracking UBI events.", @@ -45,6 +45,12 @@ } ] }, + "session_id": { + "description": "The session of the user creating the interactions. This allows us to correlate the interactions with the other events created by a service that recognizes session IDs. Can be used to track unique visits for authenticated and anonymous users.", + "type": "string", + "maxLength": 100, + "examples": ["84266fdbd31d4c2c6d0665f7e8380fa3"] + }, "client_id": { "description": "The client issuing the query. This could be a unique browser, a microservice that performs searches, a crawling bot. If only authenticated users are tracked, then you could use a specific user id here, otherwise you should use something permanent and track user id as an _Additional Property_.", "type": "string", From b633bdd2763598aa698440e642428f7e92e9ae14 Mon Sep 17 00:00:00 2001 From: Ankit Siva Date: Wed, 7 Aug 2024 09:16:34 -0700 Subject: [PATCH 3/7] fix: remove user_id documentation from client_id The client_id field had some lingering documentation about where user_ids could go. This commit cleans that up. --- schema/1.0.0/event.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/1.0.0/event.schema.json b/schema/1.0.0/event.schema.json index 16788bf..ce1536b 100644 --- a/schema/1.0.0/event.schema.json +++ b/schema/1.0.0/event.schema.json @@ -46,7 +46,7 @@ ] }, "client_id": { - "description": "The client issuing the query. This could be a unique browser, a microservice that performs searches, a crawling bot. If only authenticated users are tracked, then you could use a specific user id here, otherwise you should use something permanent and track user id as an _Additional Property_.", + "description": "The client issuing the query. This could be a unique browser, a microservice that performs searches, a crawling bot.", "type": "string", "maxLength": 100, "examples": ["5e3b2a1c-8b7d-4f2e-a3d4-c9b2e1f3a4b5","quepid-nightly-bot", "BugsBunny::Firefox@0967084"] From 8d253ae7c46aca370f0fa4ab3bd3c90a71b7dd1d Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 15 Aug 2024 17:21:33 -0400 Subject: [PATCH 4/7] add in the panana canal diagram --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f369c6c..88f2608 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,32 @@ In post processing, you can use the Client ID field to connect queries and event If your user identification is stable, then feel free to use the [Query Request --> Client ID](https://o19s.github.io/ubi/docs/html/query.request.schema.html#client_id) and [Event --> Client ID](https://o19s.github.io/ubi/docs/html/event.schema.html#client_id). Otherwise, see the above FAQ entry for how to handle it. The item ID is tracked for an event in the [Event --> Object](https://o19s.github.io/ubi/docs/html/event.schema.html#event_attributes_object) datastructure. +#### How can I correlate sensitive data? + +We often have sensitive data that is returned as part of the search process that changes quickly, and we would not want to expose to the front end. + +For example, in ecommerce, we might want to track the margin that is earned on a product, but not pass that data back to the browser, just to collect it later int he events. + +To do that, we introduce a cache into our architecture for this sensitive data: + +```mermaid +sequenceDiagram + actor Alice + Alice ->> Browser: "I want a mobile phone" + Browser ->> API: "{user_search_query:mobile phone}" + API ->> SearchEngine: "q=mobile phone" + SearchEngine ->> UBI_db: Store queryId and SKUs of phones returned to user + SearchEngine->>API: Return QueryId and list of mobile phones by SKU with price and profit margin + API ->> Cache: Store Margins per Phone under QueryId and SKU + API->> Browser: QueryID and list of mobile phones by SKU with price + Alice->> Browser: "Click iPhone 15 Pro" + Browser->> API: Click Event with SKU and QueryId + API->> Cache: Look up Margin based on QueryId and SKU + API->> UBI_db: Store queryId and SKU and Margin +``` - +Another common reason is to have rich events, but reduce the volume of data passed over the wire to the client. + ### 🏫 Learn More From 87c2edc25af5bb4fcbf529eeb4477b89919c7f8c Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 15 Aug 2024 17:22:25 -0400 Subject: [PATCH 5/7] add name --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 88f2608..d7141ae 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,8 @@ sequenceDiagram ``` Another common reason is to have rich events, but reduce the volume of data passed over the wire to the client. + +We sometimes refer to this shortcut architecture as "the Panama Canal", as in taking an extreme shortcut! From 3fe24658f6d8af0e1d2d9733a5b54334084da774 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 15 Aug 2024 17:29:09 -0400 Subject: [PATCH 6/7] text --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d7141ae..7e015cb 100644 --- a/README.md +++ b/README.md @@ -105,11 +105,11 @@ In post processing, you can use the Client ID field to connect queries and event If your user identification is stable, then feel free to use the [Query Request --> Client ID](https://o19s.github.io/ubi/docs/html/query.request.schema.html#client_id) and [Event --> Client ID](https://o19s.github.io/ubi/docs/html/event.schema.html#client_id). Otherwise, see the above FAQ entry for how to handle it. The item ID is tracked for an event in the [Event --> Object](https://o19s.github.io/ubi/docs/html/event.schema.html#event_attributes_object) datastructure. -#### How can I correlate sensitive data? +#### How can I correlate private sensitive data with public event tracking? -We often have sensitive data that is returned as part of the search process that changes quickly, and we would not want to expose to the front end. +We often have sensitive data that is returned as part of the search process that changes quickly, and we would not want to expose that information in front end, even hidden! -For example, in ecommerce, we might want to track the margin that is earned on a product, but not pass that data back to the browser, just to collect it later int he events. +For example, in ecommerce, we might want to track the margin that is earned on a product, but not pass that data back to the browser, just to collect it later in the events. To do that, we introduce a cache into our architecture for this sensitive data: From d73ac3376d2773c11a72f7d7b142bb1f09ffeb77 Mon Sep 17 00:00:00 2001 From: Mike Robins Date: Wed, 21 Aug 2024 11:47:06 +1000 Subject: [PATCH 7/7] Adds impression to action enum --- schema/1.0.0/event.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/1.0.0/event.schema.json b/schema/1.0.0/event.schema.json index e6e89cb..89fce12 100644 --- a/schema/1.0.0/event.schema.json +++ b/schema/1.0.0/event.schema.json @@ -22,7 +22,7 @@ { "type": "string", "maxLength": 100, - "enum": ["click_through", "add_to_cart", "click", "watch", "view", "purchase"] + "enum": ["click_through", "add_to_cart", "click", "watch", "view", "purchase", "impression"] }, { "type": "string",