Skip to content

EA Forum Data Import #838

EA Forum Data Import

EA Forum Data Import #838

Workflow file for this run

name: EA Forum Data Import
on:
push:
branches:
- main
paths:
- .github/workflows/flat.yml
workflow_dispatch: null
schedule:
- cron: 0 3 * * *
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Setup deno
uses: denoland/setup-deno@main
with:
deno-version: v1.10.x
- name: Check out repo
uses: actions/checkout@v2
- name: Fetch data
uses: githubocto/flat@v3
with:
http_url: https://forum.effectivealtruism.org/graphql?query={%20posts(input:%20{terms:%20{limit:%201000}})%20{%20results%20{%20_id%20postedAt%20modifiedAt%20title%20question%20meta%20lastCommentedAt%20pageUrl%20htmlBody%20shortform%20tagRelevance%20voteCount%20baseScore%20extendedScore%20score%20pingbacks%20tags%20{%20createdAt%20name%20slug%20description%20{%20html%20}%20postCount%20lastVisitedAt%20_id%20}%20}%20}%20}
downloaded_filename: posts.json
- name: Neo4j import
uses: quinn-p-mchugh/flat-graph@main
with:
neo4j-user: ${{secrets.NEO4J_USER}}
neo4j-password: ${{secrets.NEO4J_PASSWORD}}
neo4j-uri: ${{secrets.NEO4J_URI}}
filename: posts.json
cypher-run-method: apoc.periodic.iterate
apoc-iterate-batchSize: 1
apoc-iterate-cypherIterate: |
UNWIND $value.data.posts.results AS post RETURN post
apoc-iterate-cypherAction: >
MERGE (p:Post {_id: post._id}) ON CREATE SET
p.postedAt = DateTime(post.postedAt),
p.htmlBody = post.htmlBody,
p.shortform = toBoolean(post.shortform),
p.question = toBoolean(p.question),
p.meta = post.meta
SET
p.pageUrl = post.pageUrl,
p.modifiedAt = DateTime(post.modifiedAt),
p.title = post.title,
p.lastCommentedAt = DateTime(post.lastCommentedAt),
p.voteCount = post.voteCount,
p.baseScore = post.baseScore,
p.extendedScore = post.extendedScore,
p.score = post.score
WITH post, p
UNWIND post.tags as tag
MERGE (t:Tag {_id: tag._id})
ON CREATE SET
t.createdAt = DateTime(tag.createdAt),
t.description = tag.description.html,
t.slug = tag.slug
SET
t.name = tag.name,
t.postCount = tag.postCount,
t.lastVisitedAt = DateTime(tag.lastVisitedAt)
MERGE (t)-[:APPLIES_TO {tagRelevance: post.tagRelevance[tag._id]}]->(p)
WITH post, p
UNWIND post.pingbacks.Posts AS pingbackPostId
MERGE (pb:Post {_id: pingbackPostId})
MERGE (p)-[:MENTIONED_IN]->(pb)