Skip to content

Commit

Permalink
Merge pull request #4220 from thematters/feat/tag-aglo
Browse files Browse the repository at this point in the history
  • Loading branch information
pauljusti authored Nov 18, 2024
2 parents bedd0f4 + 73cfa06 commit dd42a36
Show file tree
Hide file tree
Showing 20 changed files with 378 additions and 262 deletions.
58 changes: 58 additions & 0 deletions db/migrations/20241112112619_create_tag_stats_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const materialized = 'tag_stats_materialized'

exports.up = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`)

await knex.raw(`
CREATE MATERIALIZED VIEW ${materialized} AS
WITH article_tags AS (
SELECT
at.tag_id,
at.article_id,
article.author_id
FROM article_tag at
INNER JOIN article
ON article.id = at.article_id
INNER JOIN "user" u
ON u.id = article.author_id
WHERE article.state = 'active'
AND u.state NOT IN ('forzen', 'archived')
AND u.id NOT IN (
SELECT user_id
FROM user_restriction
)
AND at.created_at AT TIME ZONE 'Asia/Taipei' >= NOW() - INTERVAL '12 months'
AND at.created_at AT TIME ZONE 'Asia/Taipei' < NOW()
),
tag_stats AS (
SELECT
tag_id,
COUNT(article_id)::INT AS all_articles,
COUNT(DISTINCT author_id)::INT AS all_users
FROM article_tags
GROUP BY tag_id
),
user_threshold AS (
SELECT DISTINCT
PERCENTILE_CONT(0.15) WITHIN GROUP (ORDER BY all_users) AS threshold
FROM tag_stats
)
SELECT
ts.tag_id,
tag.content,
ts.all_articles,
ts.all_users
FROM tag_stats ts
INNER JOIN tag
ON tag.id = ts.tag_id
CROSS JOIN user_threshold ut
WHERE ts.all_users > ut.threshold
ORDER BY
ts.all_users DESC,
ts.all_articles DESC
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${materialized}`)
}
27 changes: 27 additions & 0 deletions db/migrations/20241112165500_create_article_stats_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const table = 'article_stats_materialized'

exports.up = async (knex) => {
await knex.raw(`
CREATE MATERIALIZED VIEW ${table} AS
SELECT
COALESCE(r.article_id, c.reference_id) as article_id,
COALESCE(r.reads, 0) as reads,
COALESCE(c.claps, 0) as claps
FROM (
SELECT article_id, sum(timed_count)::int AS reads
FROM article_read_count
WHERE user_id IS NOT NULL
GROUP BY article_id
) r
FULL OUTER JOIN (
SELECT reference_id, sum(amount)::int AS claps
FROM appreciation
WHERE purpose = 'appreciate'
GROUP BY reference_id
) c ON r.article_id = c.reference_id
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`)
}
71 changes: 71 additions & 0 deletions db/migrations/20241113094622_create_tag_hottest_view.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const table = 'tag_hottest_materialized'

exports.up = async (knex) => {
await knex.raw(`
CREATE MATERIALIZED VIEW ${table} AS
WITH raw_article_data AS (
SELECT
at.tag_id,
to_char(article.created_at, 'YYYYMM') AS month,
at.article_id,
article.author_id
FROM article_tag AS at
INNER JOIN article ON article.id = at.article_id
INNER JOIN "user" AS u ON u.id = article.author_id
WHERE article.state = 'active'
AND u.state NOT in('frozen', 'archived')
AND u.id NOT in(SELECT user_id FROM user_restriction)
),
monthly_stats AS (
SELECT tag_id, month,
count(article_id)::int articles,
count(DISTINCT author_id)::int users
FROM raw_article_data
GROUP BY tag_id, month
),
tag_averages AS (
SELECT tag_id,
count(month) AS months,
avg(articles) AS mean_articles,
avg(users) AS mean_users
FROM monthly_stats
GROUP BY tag_id
),
tag_z_scores AS (
SELECT tag_id, months,
(months - avg(months) OVER()) / NULLIF(stddev(months) OVER(), 0) AS z_months,
mean_articles,
(mean_articles - avg(mean_articles) OVER()) / NULLIF(stddev(mean_articles) OVER(), 0) AS z_articles,
mean_users,
(mean_users - avg(mean_users) OVER()) / NULLIF(stddev(mean_users) OVER(), 0) AS z_users
FROM tag_averages
),
significant_scores AS (
SELECT tag_id, months,
CASE WHEN z_months < 2 THEN 0 ELSE z_months END AS z_months,
mean_articles,
CASE WHEN z_articles < 2 THEN 0 ELSE z_articles END AS z_articles,
mean_users,
CASE WHEN z_users < 2 THEN 0 ELSE z_users END AS z_users
FROM tag_z_scores
)
SELECT
base.tag_id,
tag.content,
base.months,
base.mean_articles,
base.mean_users,
base.score
FROM (
SELECT *, z_months * z_articles * z_users AS score
FROM significant_scores
) AS base
INNER JOIN tag ON tag.id = base.tag_id
WHERE base.score > 0
ORDER BY base.score DESC
`)
}

exports.down = async (knex) => {
await knex.raw(`DROP MATERIALIZED VIEW IF EXISTS ${table}`)
}
22 changes: 12 additions & 10 deletions schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,19 @@ type Mutation {
"""Edit an article."""
editArticle(input: EditArticleInput!): Article!

"""Subscribe or Unsubscribe article"""
toggleSubscribeArticle(input: ToggleItemInput!): Article!
"""Bookmark or unbookmark article"""
toggleSubscribeArticle(input: ToggleItemInput!): Article! @deprecated(reason: "Use toggleBookmarkArticle instead")
toggleBookmarkArticle(input: ToggleItemInput!): Article!

"""Appreciate an article."""
appreciateArticle(input: AppreciateArticleInput!): Article!

"""Read an article."""
readArticle(input: ReadArticleInput!): Article!

"""Follow or unfollow tag."""
toggleFollowTag(input: ToggleItemInput!): Tag!
"""Bookmark or unbookmark tag."""
toggleFollowTag(input: ToggleItemInput!): Tag! @deprecated(reason: "Use toggleBookmarkTag instead")
toggleBookmarkTag(input: ToggleItemInput!): Tag!
toggleArticleRecommend(input: ToggleRecommendInput!): Article!
updateArticleState(input: UpdateArticleStateInput!): Article!
updateArticleSensitive(input: UpdateArticleSensitiveInput!): Article!
Expand Down Expand Up @@ -362,9 +364,6 @@ type Article implements Node & PinnableWork {
"""Total number of readers of this article."""
readerCount: Int!

"""Subscribers of this article."""
subscribers(input: ConnectionArgs!): UserConnection!

"""Limit the nuhmber of appreciate per user."""
appreciateLimit: Int!

Expand All @@ -377,8 +376,9 @@ type Article implements Node & PinnableWork {
"""This value determines if current viewer can SuperLike or not."""
canSuperLike: Boolean!

"""This value determines if current Viewer has subscribed of not."""
subscribed: Boolean!
"""This value determines if current Viewer has bookmarked of not."""
subscribed: Boolean! @deprecated(reason: "Use bookmarked instead")
bookmarked: Boolean!

"""
This value determines if this article is an author selected article or not.
Expand Down Expand Up @@ -519,6 +519,9 @@ type Tag implements Node {
"""Tags recommended based on relations to current tag."""
recommended(input: ConnectionArgs!): TagConnection!

"""Authors recommended based on relations to current tag."""
recommendedAuthors(input: ConnectionArgs!): UserConnection!

"""Counts of this tag."""
numArticles: Int!
numAuthors: Int!
Expand Down Expand Up @@ -709,7 +712,6 @@ input TagArticlesInput {
after: String
first: Int
oss: Boolean
selected: Boolean
sortBy: TagArticlesSortBy = byCreatedAtDesc
}

Expand Down
6 changes: 3 additions & 3 deletions src/common/enums/table.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
export enum VIEW {
tag_count_view = 'tag_count_view',
user_reader_view = 'user_reader_view',
article_count_view = 'article_count_view',
article_hottest_view = 'article_hottest_view',
transaction_delta_view = 'transaction_delta_view',
article_value_view = 'article_value_view',
tags_lasts_view = 'mat_views.tags_lasts',
}

export enum MATERIALIZED_VIEW {
tag_count_materialized = 'tag_count_materialized',
tags_lasts_view_materialized = 'mat_views.tags_lasts_view_materialized',
tag_stats_materialized = 'tag_stats_materialized',
tag_hottest_materialized = 'tag_hottest_materialized',
article_stats_materialized = 'article_stats_materialized',
user_reader_materialized = 'user_reader_materialized',
featured_comment_materialized = 'featured_comment_materialized',
curation_tag_materialized = 'curation_tag_materialized',
Expand Down
Loading

0 comments on commit dd42a36

Please sign in to comment.