Skip to content

Commit

Permalink
CLDR-17935 site: add crawler sitemap (unicode-org#4028)
Browse files Browse the repository at this point in the history
  • Loading branch information
srl295 authored and haytenf committed Sep 17, 2024
1 parent c91af61 commit eaba9c3
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 4 deletions.
2 changes: 2 additions & 0 deletions docs/site/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/node_modules
/assets/json
/assets/vendor
/sitemap.xml
/sitemap.md
4 changes: 1 addition & 3 deletions docs/site/_layouts/page.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
</div>
</div>
<div class="message"><i>This navigation UI is temporary, just to give access to the pages.</i></div>

<!-- <div class="bar"><a href="http://www.unicode.org" class="bar">Home</a>
| <a href="http://www.unicode.org/search/" class="bar">Search</a></div> -->
<!-- <div class="bar"><a href="/sitemap" class="bar">Site Map</a></div> -->
</header>


Expand Down
49 changes: 48 additions & 1 deletion docs/site/assets/js/build.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import * as fs from "node:fs/promises";
import * as path from "node:path";
import { default as process } from "node:process";
import { default as matter } from "gray-matter";
import { SitemapStream, streamToPromise } from "sitemap";
import { Readable } from "node:stream";

const SKIP_THESE = /(node_modules|\.jekyll-cache)/;
const SKIP_THESE = /(node_modules|\.jekyll-cache|^sitemap.*)/;

async function processFile(d, fullPath, out) {
const f = await fs.readFile(fullPath, "utf-8");
Expand Down Expand Up @@ -40,6 +42,49 @@ async function traverse(d, out) {
return Promise.all(promises);
}

/** replace a/b/c.md with a/b/c */
function dropmd(p) {
return p.replace(/\.md$/, "");
}

async function writeSiteMaps(out) {
// simple list of links
const links = await Promise.all(
out.all.map(async ({ fullPath, title }) => {
const stat = await fs.stat(fullPath);
return {
url: dropmd(`/${fullPath}`),
lastmod: stat.mtime.toISOString(),
};
})
);
const stream = new SitemapStream({ hostname: "https://cldr.unicode.org" });
const data = (
await streamToPromise(Readable.from(links).pipe(stream))
).toString();
await fs.writeFile("./sitemap.xml", data, "utf-8");
console.log("Wrote sitemap.xml");

/*
const coll = new Intl.Collator(["und"]);
const allSorted = [...out.all].sort((a, b) =>
coll.compare(a.fullPath, b.fullPath)
);
await fs.writeFile(
"./sitemap.md",
`---\ntitle: Site Map\n---\n\n` +
allSorted
.map(
({ fullPath, title }) =>
`- [/${fullPath}](/${dropmd(fullPath)}) - ${title}`
)
.join("\n"),
"utf-8"
);
console.log("Wrote sitemap.md");
*/
}

async function main() {
const out = {
all: [],
Expand All @@ -48,6 +93,8 @@ async function main() {
await fs.mkdir("assets/json/", { recursive: true });
await traverse(".", out);
await fs.writeFile("assets/json/tree.json", JSON.stringify(out, null, " "));
console.log("Wrote assets/json/tree.json");
await writeSiteMaps(out);
}

main().then(
Expand Down
42 changes: 42 additions & 0 deletions docs/site/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions docs/site/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"private": true,
"dependencies": {
"gray-matter": "^4.0.3",
"sitemap": "^8.0.0",
"vue": "^3.5.0"
}
}
1 change: 1 addition & 0 deletions docs/site/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sitemap: https://cldr.unicode.org/sitemap.xml

0 comments on commit eaba9c3

Please sign in to comment.