Skip to content

Commit 8b29026

Browse files
authored
Merge pull request #33 from rehanvdm/feature/add-filters
2 parents e677f76 + 982979f commit 8b29026

File tree

17 files changed

+465
-169
lines changed

17 files changed

+465
-169
lines changed

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ even if it goes against "best practices".
2727

2828
## Getting started
2929

30+
> 📖 Alternatively, read a [step-by-step guide](https://dev.to/aws/deploying-a-serverless-web-analytics-solution-for-your-websites-5coh) written by Ricardo Sueiras
31+
3032
### Serverside setup
3133

3234
> ⚠️ Requires your project `aws-cdk` and `aws-cdk-lib` packages to be greater than 2.79.1
@@ -184,13 +186,13 @@ app.mount('#app');
184186

185187
The worst case projected costs are:
186188

187-
| Views | Cost($) |
188-
|-------------|----------|
189-
| 10,000 | 2.01 |
190-
| 100,000 | 3.24 |
191-
| 1,000,000 | 14.64 |
192-
| 10,000,000 | 128.74 |
193-
| 100,000,000 | 1,288.39 |
189+
| Views | Cost($) |
190+
|-------------|---------|
191+
| 10,000 | 0.52 |
192+
| 100,000 | 1.01 |
193+
| 1,000,000 | 10.18 |
194+
| 10,000,000 | 58.88 |
195+
| 100,000,000 | 550.32 |
194196

195197
## What's in the box
196198

docs/API.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ even if it goes against "best practices".
2727

2828
## Getting started
2929

30+
> 📖 Alternatively, read a [step-by-step guide](https://dev.to/aws/deploying-a-serverless-web-analytics-solution-for-your-websites-5coh) written by Ricardo Sueiras
31+
3032
### Serverside setup
3133

3234
> ⚠️ Requires your project `aws-cdk` and `aws-cdk-lib` packages to be greater than 2.79.1
@@ -184,13 +186,13 @@ app.mount('#app');
184186

185187
The worst case projected costs are:
186188

187-
| Views | Cost($) |
188-
|-------------|----------|
189-
| 10,000 | 2.01 |
190-
| 100,000 | 3.24 |
191-
| 1,000,000 | 14.64 |
192-
| 10,000,000 | 128.74 |
193-
| 100,000,000 | 1,288.39 |
189+
| Views | Cost($) |
190+
|-------------|---------|
191+
| 10,000 | 0.52 |
192+
| 100,000 | 1.01 |
193+
| 1,000,000 | 10.18 |
194+
| 10,000,000 | 58.88 |
195+
| 100,000,000 | 550.32 |
194196

195197
## What's in the box
196198

src/src/backend/api-front/routes/stats/index.ts

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { assertAuthentication, TrpcInstance } from '@backend/api-front/server';
33
import { SchemaSite } from '@backend/lib/models/site';
44
import { DateUtils } from '@backend/lib/utils/date_utils';
55
import { AthenaPageViews } from '@backend/lib/dal/athena/page_views';
6+
import { FilterSchema } from '@backend/lib/models/filter';
67

78
const GetTopLevelStatsSchema = z.object({
89
visitors: z.number(),
@@ -33,21 +34,24 @@ export function getTopLevelStats(trpcInstance: TrpcInstance) {
3334
from: z.string().datetime(),
3435
to: z.string().datetime(),
3536
sites: z.array(SchemaSite),
37+
filter: FilterSchema.optional(),
3638
})
3739
)
3840
.output(GetTopLevelStatsSchema)
3941
.query(async ({ input, ctx }) => {
4042
assertAuthentication(ctx);
4143

44+
if (input.filter && Object.keys(input.filter).length === 0) input.filter = undefined;
45+
4246
const athenaPageViews = new AthenaPageViews();
4347

4448
const fromDate = DateUtils.parseIso(input.from);
4549
const toDate = DateUtils.parseIso(input.to);
4650
const { prevStartDate, prevEndDate } = DateUtils.getPreviousPeriod(fromDate, toDate);
4751

4852
const [totals, totalsPrev] = await Promise.all([
49-
athenaPageViews.totalsForPeriod(fromDate, toDate, input.sites),
50-
athenaPageViews.totalsForPeriod(prevStartDate, prevEndDate, input.sites),
53+
athenaPageViews.totalsForPeriod(fromDate, toDate, input.sites, input.filter),
54+
athenaPageViews.totalsForPeriod(prevStartDate, prevEndDate, input.sites, input.filter),
5155
]);
5256

5357
return {
@@ -92,6 +96,7 @@ export function getPageViews(trpcInstance: TrpcInstance) {
9296
sites: z.array(SchemaSite),
9397
queryExecutionId: z.string().optional(),
9498
nextToken: z.string().optional(),
99+
filter: FilterSchema.optional(),
95100
})
96101
)
97102
.output(
@@ -104,6 +109,8 @@ export function getPageViews(trpcInstance: TrpcInstance) {
104109
.query(async ({ input, ctx }) => {
105110
assertAuthentication(ctx);
106111

112+
if (input.filter && Object.keys(input.filter).length === 0) input.filter = undefined;
113+
107114
const athenaPageViews = new AthenaPageViews();
108115

109116
const fromDate = DateUtils.parseIso(input.from);
@@ -114,7 +121,8 @@ export function getPageViews(trpcInstance: TrpcInstance) {
114121
toDate,
115122
input.sites,
116123
input.queryExecutionId,
117-
input.nextToken
124+
input.nextToken,
125+
input.filter
118126
);
119127
return {
120128
queryExecutionId: pageViews.queryExecutionId,
@@ -140,12 +148,15 @@ export function getChartViews(trpcInstance: TrpcInstance) {
140148
sites: z.array(SchemaSite),
141149
period: z.enum(['hour', 'day']),
142150
timeZone: z.string(),
151+
filter: FilterSchema.optional(),
143152
})
144153
)
145154
.output(z.array(ChartViewsSchema))
146155
.query(async ({ input, ctx }) => {
147156
assertAuthentication(ctx);
148157

158+
if (input.filter && Object.keys(input.filter).length === 0) input.filter = undefined;
159+
149160
const athenaPageViews = new AthenaPageViews();
150161

151162
const fromDate = DateUtils.parseIso(input.from);
@@ -161,7 +172,8 @@ export function getChartViews(trpcInstance: TrpcInstance) {
161172
toDate,
162173
input.sites,
163174
input.period,
164-
input.timeZone
175+
input.timeZone,
176+
input.filter
165177
);
166178
return chartViews.map((row) => ({ ...row, date_key: DateUtils.stringifyIso(row.date_key) }));
167179
});
@@ -206,18 +218,21 @@ export function getPageReferrers(trpcInstance: TrpcInstance) {
206218
from: z.string().datetime(),
207219
to: z.string().datetime(),
208220
sites: z.array(SchemaSite),
221+
filter: FilterSchema.optional(),
209222
})
210223
)
211224
.output(z.array(PageReferrerSchema))
212225
.query(async ({ input, ctx }) => {
213226
assertAuthentication(ctx);
214227

228+
if (input.filter && Object.keys(input.filter).length === 0) input.filter = undefined;
229+
215230
const athenaPageViews = new AthenaPageViews();
216231

217232
const fromDate = DateUtils.parseIso(input.from);
218233
const toDate = DateUtils.parseIso(input.to);
219234

220-
const PageReferrers = await athenaPageViews.referrersForPeriod(fromDate, toDate, input.sites);
235+
const PageReferrers = await athenaPageViews.referrersForPeriod(fromDate, toDate, input.sites, input.filter);
221236
return PageReferrers;
222237
});
223238
}
@@ -243,18 +258,27 @@ export function getUsersGroupedByStatForPeriod(trpcInstance: TrpcInstance) {
243258
'utm_term',
244259
'utm_content',
245260
]), // TODO: Later: "browser", "os"
261+
filter: FilterSchema.optional(),
246262
})
247263
)
248264
.output(z.array(UsersGroupedByStatSchema))
249265
.query(async ({ input, ctx }) => {
250266
assertAuthentication(ctx);
251267

268+
if (input.filter && Object.keys(input.filter).length === 0) input.filter = undefined;
269+
252270
const athenaPageViews = new AthenaPageViews();
253271

254272
const fromDate = DateUtils.parseIso(input.from);
255273
const toDate = DateUtils.parseIso(input.to);
256274

257-
const data = await athenaPageViews.usersGroupedByStatForPeriod(fromDate, toDate, input.sites, input.groupBy);
275+
const data = await athenaPageViews.usersGroupedByStatForPeriod(
276+
fromDate,
277+
toDate,
278+
input.sites,
279+
input.groupBy,
280+
input.filter
281+
);
258282
return data;
259283
});
260284
}

src/src/backend/lib/dal/athena/page_views.ts

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { AthenaBase } from '@backend/lib/utils/athena_base';
33
import { getAthenaClient, getS3Client } from '@backend/lib/utils/lazy_aws';
44
import { LambdaEnvironment } from '@backend/api-front/environment';
55
import { Page } from '@backend/lib/models/page';
6+
import { Filter } from '@backend/lib/models/filter';
67

78
export class AthenaPageViews extends AthenaBase {
89
constructor() {
@@ -23,10 +24,22 @@ export class AthenaPageViews extends AthenaBase {
2324
* @param fromDate
2425
* @param toDate
2526
* @param sites
27+
* @param filter
2628
*/
27-
cteFilteredDataQuery(columns: string[], fromDate: Date, toDate: Date, sites: string[]) {
29+
cteFilteredDataQuery(columns: string[], fromDate: Date, toDate: Date, sites: string[], filter?: Filter) {
2830
const cteWhereClauseSites = sites.map((site) => `site = '${site}'`).join(' OR ');
29-
const cteWhereClause = `(${cteWhereClauseSites})`;
31+
let cteWhereClauseExtra = '';
32+
33+
if (filter) {
34+
const cteWhereClauseFilter = Object.entries(filter)
35+
.map(([key, value]) => {
36+
if (value === null) return `${key} IS NULL`;
37+
else return `${key} = '${value}'`;
38+
})
39+
.join(' AND ');
40+
cteWhereClauseExtra += ` AND (${cteWhereClauseFilter})`;
41+
}
42+
3043
const exactTimeFrom = DateUtils.stringifyFormat(fromDate, 'yyyy-MM-dd HH:mm:ss.SSS');
3144
const exactTimeTo = DateUtils.stringifyFormat(toDate, 'yyyy-MM-dd HH:mm:ss.SSS');
3245

@@ -35,8 +48,8 @@ export class AthenaPageViews extends AthenaBase {
3548
SELECT ${columns.join(', ')}, page_opened_at,
3649
ROW_NUMBER() OVER (PARTITION BY page_id ORDER BY time_on_page DESC) rn
3750
FROM page_views
38-
WHERE ${cteWhereClause} AND page_opened_at BETWEEN parse_datetime('${exactTimeFrom}','yyyy-MM-dd HH:mm:ss.SSS')
39-
AND parse_datetime('${exactTimeTo}','yyyy-MM-dd HH:mm:ss.SSS')
51+
WHERE (${cteWhereClauseSites}) AND page_opened_at BETWEEN parse_datetime('${exactTimeFrom}','yyyy-MM-dd HH:mm:ss.SSS')
52+
AND parse_datetime('${exactTimeTo}','yyyy-MM-dd HH:mm:ss.SSS') ${cteWhereClauseExtra}
4053
),
4154
cte_data_filtered AS (
4255
SELECT *
@@ -45,13 +58,14 @@ export class AthenaPageViews extends AthenaBase {
4558
)`;
4659
}
4760

48-
async totalsForPeriod(fromDate: Date, toDate: Date, sites: string[]) {
61+
async totalsForPeriod(fromDate: Date, toDate: Date, sites: string[], filter?: Filter) {
4962
const query = `
5063
WITH ${this.cteFilteredDataQuery(
5164
['user_id', 'session_id', 'page_id', 'time_on_page'],
5265
fromDate,
5366
toDate,
54-
sites
67+
sites,
68+
filter
5569
)},
5670
totals_basic AS (
5771
SELECT
@@ -98,14 +112,15 @@ export class AthenaPageViews extends AthenaBase {
98112
sites: string[],
99113
queryExecutionId?: string,
100114
nextToken?: string,
115+
filter?: Filter,
101116
limit = 1000
102117
) {
103118
if (queryExecutionId && !nextToken) {
104119
throw new Error('Cannot paginate results without a nextToken');
105120
}
106121

107122
const query = `
108-
WITH ${this.cteFilteredDataQuery(['site', 'page_url', 'time_on_page'], fromDate, toDate, sites)},
123+
WITH ${this.cteFilteredDataQuery(['site', 'page_url', 'time_on_page'], fromDate, toDate, sites, filter)},
109124
cte_data_by_page_view AS (
110125
SELECT
111126
site,
@@ -132,9 +147,16 @@ export class AthenaPageViews extends AthenaBase {
132147
};
133148
}
134149

135-
async chartViewsForPeriod(fromDate: Date, toDate: Date, sites: string[], period: 'hour' | 'day', timeZone: string) {
150+
async chartViewsForPeriod(
151+
fromDate: Date,
152+
toDate: Date,
153+
sites: string[],
154+
period: 'hour' | 'day',
155+
timeZone: string,
156+
filter?: Filter
157+
) {
136158
const query = `
137-
WITH ${this.cteFilteredDataQuery(['site', 'user_id', 'page_id'], fromDate, toDate, sites)}
159+
WITH ${this.cteFilteredDataQuery(['site', 'user_id', 'page_id'], fromDate, toDate, sites, filter)}
138160
SELECT
139161
site,
140162
CAST(DATE_TRUNC('${period}', page_opened_at AT TIME ZONE '${timeZone}') AS TIMESTAMP) as "date_key",
@@ -183,9 +205,9 @@ export class AthenaPageViews extends AthenaBase {
183205
// }[];
184206
// }
185207

186-
async referrersForPeriod(fromDate: Date, toDate: Date, sites: string[]) {
208+
async referrersForPeriod(fromDate: Date, toDate: Date, sites: string[], filter?: Filter) {
187209
const query = `
188-
WITH ${this.cteFilteredDataQuery(['referrer'], fromDate, toDate, sites)}
210+
WITH ${this.cteFilteredDataQuery(['referrer'], fromDate, toDate, sites, filter)}
189211
SELECT
190212
COALESCE(referrer, 'No Referrer') AS referrer,
191213
COUNT(*) as "views"
@@ -201,7 +223,7 @@ export class AthenaPageViews extends AthenaBase {
201223
}[];
202224
}
203225

204-
async usersGroupedByStatForPeriod(fromDate: Date, toDate: Date, sites: string[], stat: keyof Page) {
226+
async usersGroupedByStatForPeriod(fromDate: Date, toDate: Date, sites: string[], stat: keyof Page, filter?: Filter) {
205227
// Alternative query for getting country names grouped by visitors
206228
// TODO: Can be optimized to this. Scans half the amount of data then, will rework all of them later
207229
// SELECT
@@ -237,7 +259,7 @@ export class AthenaPageViews extends AthenaBase {
237259
// visitors DESC
238260

239261
const query = `
240-
WITH ${this.cteFilteredDataQuery(['user_id', stat], fromDate, toDate, sites)},
262+
WITH ${this.cteFilteredDataQuery(['user_id', stat], fromDate, toDate, sites, filter)},
241263
user_distinct_stat AS (
242264
SELECT
243265
user_id, ${stat},
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import { z } from 'zod';
2+
3+
export const FilterSchema = z.object({
4+
page_url: z.string().optional(),
5+
referrer: z.string().optional().nullable(),
6+
country_name: z.string().optional(),
7+
device_type: z.string().optional(),
8+
utm_source: z.string().optional(),
9+
utm_medium: z.string().optional(),
10+
utm_campaign: z.string().optional(),
11+
utm_term: z.string().optional(),
12+
utm_content: z.string().optional(),
13+
});
14+
export type Filter = z.infer<typeof FilterSchema>;

0 commit comments

Comments
 (0)