-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4220 from thematters/feat/tag-aglo
- Loading branch information
Showing
20 changed files
with
378 additions
and
262 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
const materialized = 'tag_stats_materialized' | ||
|
||
exports.up = async (knex) => { | ||
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`) | ||
|
||
await knex.raw(` | ||
CREATE MATERIALIZED VIEW ${materialized} AS | ||
WITH article_tags AS ( | ||
SELECT | ||
at.tag_id, | ||
at.article_id, | ||
article.author_id | ||
FROM article_tag at | ||
INNER JOIN article | ||
ON article.id = at.article_id | ||
INNER JOIN "user" u | ||
ON u.id = article.author_id | ||
WHERE article.state = 'active' | ||
AND u.state NOT IN ('forzen', 'archived') | ||
AND u.id NOT IN ( | ||
SELECT user_id | ||
FROM user_restriction | ||
) | ||
AND at.created_at AT TIME ZONE 'Asia/Taipei' >= NOW() - INTERVAL '12 months' | ||
AND at.created_at AT TIME ZONE 'Asia/Taipei' < NOW() | ||
), | ||
tag_stats AS ( | ||
SELECT | ||
tag_id, | ||
COUNT(article_id)::INT AS all_articles, | ||
COUNT(DISTINCT author_id)::INT AS all_users | ||
FROM article_tags | ||
GROUP BY tag_id | ||
), | ||
user_threshold AS ( | ||
SELECT DISTINCT | ||
PERCENTILE_CONT(0.15) WITHIN GROUP (ORDER BY all_users) AS threshold | ||
FROM tag_stats | ||
) | ||
SELECT | ||
ts.tag_id, | ||
tag.content, | ||
ts.all_articles, | ||
ts.all_users | ||
FROM tag_stats ts | ||
INNER JOIN tag | ||
ON tag.id = ts.tag_id | ||
CROSS JOIN user_threshold ut | ||
WHERE ts.all_users > ut.threshold | ||
ORDER BY | ||
ts.all_users DESC, | ||
ts.all_articles DESC | ||
`) | ||
} | ||
|
||
exports.down = async (knex) => { | ||
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
const table = 'article_stats_materialized' | ||
|
||
exports.up = async (knex) => { | ||
await knex.raw(` | ||
CREATE MATERIALIZED VIEW ${table} AS | ||
SELECT | ||
COALESCE(r.article_id, c.reference_id) as article_id, | ||
COALESCE(r.reads, 0) as reads, | ||
COALESCE(c.claps, 0) as claps | ||
FROM ( | ||
SELECT article_id, sum(timed_count)::int AS reads | ||
FROM article_read_count | ||
WHERE user_id IS NOT NULL | ||
GROUP BY article_id | ||
) r | ||
FULL OUTER JOIN ( | ||
SELECT reference_id, sum(amount)::int AS claps | ||
FROM appreciation | ||
WHERE purpose = 'appreciate' | ||
GROUP BY reference_id | ||
) c ON r.article_id = c.reference_id | ||
`) | ||
} | ||
|
||
exports.down = async (knex) => { | ||
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
const table = 'tag_hottest_materialized' | ||
|
||
exports.up = async (knex) => { | ||
await knex.raw(` | ||
CREATE MATERIALIZED VIEW ${table} AS | ||
WITH raw_article_data AS ( | ||
SELECT | ||
at.tag_id, | ||
to_char(article.created_at, 'YYYYMM') AS month, | ||
at.article_id, | ||
article.author_id | ||
FROM article_tag AS at | ||
INNER JOIN article ON article.id = at.article_id | ||
INNER JOIN "user" AS u ON u.id = article.author_id | ||
WHERE article.state = 'active' | ||
AND u.state NOT in('frozen', 'archived') | ||
AND u.id NOT in(SELECT user_id FROM user_restriction) | ||
), | ||
monthly_stats AS ( | ||
SELECT tag_id, month, | ||
count(article_id)::int articles, | ||
count(DISTINCT author_id)::int users | ||
FROM raw_article_data | ||
GROUP BY tag_id, month | ||
), | ||
tag_averages AS ( | ||
SELECT tag_id, | ||
count(month) AS months, | ||
avg(articles) AS mean_articles, | ||
avg(users) AS mean_users | ||
FROM monthly_stats | ||
GROUP BY tag_id | ||
), | ||
tag_z_scores AS ( | ||
SELECT tag_id, months, | ||
(months - avg(months) OVER()) / NULLIF(stddev(months) OVER(), 0) AS z_months, | ||
mean_articles, | ||
(mean_articles - avg(mean_articles) OVER()) / NULLIF(stddev(mean_articles) OVER(), 0) AS z_articles, | ||
mean_users, | ||
(mean_users - avg(mean_users) OVER()) / NULLIF(stddev(mean_users) OVER(), 0) AS z_users | ||
FROM tag_averages | ||
), | ||
significant_scores AS ( | ||
SELECT tag_id, months, | ||
CASE WHEN z_months < 2 THEN 0 ELSE z_months END AS z_months, | ||
mean_articles, | ||
CASE WHEN z_articles < 2 THEN 0 ELSE z_articles END AS z_articles, | ||
mean_users, | ||
CASE WHEN z_users < 2 THEN 0 ELSE z_users END AS z_users | ||
FROM tag_z_scores | ||
) | ||
SELECT | ||
base.tag_id, | ||
tag.content, | ||
base.months, | ||
base.mean_articles, | ||
base.mean_users, | ||
base.score | ||
FROM ( | ||
SELECT *, z_months * z_articles * z_users AS score | ||
FROM significant_scores | ||
) AS base | ||
INNER JOIN tag ON tag.id = base.tag_id | ||
WHERE base.score > 0 | ||
ORDER BY base.score DESC | ||
`) | ||
} | ||
|
||
exports.down = async (knex) => { | ||
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.