vitepress publish validation

treeinfra · Jun 23, 2024 · 2883259 · 2883259
2 parents f156d44 + 7c8751d
commit 2883259
Show file tree

Hide file tree

Showing 13 changed files with 315 additions and 27 deletions.
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -0,0 +1,38 @@
+name: deploy using vitepress
+
+on:
+  push: {branches: [main]}
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: pages
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+      - uses: actions/configure-pages@v4
+      - run: npm install
+      - run: npm run doc.build
+      - uses: actions/upload-pages-artifact@v3
+        with: {path: .vitepress/dist}
+
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    needs: build
+    runs-on: ubuntu-latest
+    name: deploy
+    steps:
+      - name: deploy to github pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,8 @@ index.js.map
 index.d.ts
 index.cjs
 index.js
+.vitepress/cache/
+.vitepress/dist/
 
 # Platform specified files.
 .DS_Store

diff --git a/.npmignore b/.npmignore
@@ -6,6 +6,9 @@ yarn.lock
 
 # Repo config files.
 .github/
+.vitepress/
+.vscode/
+docs/
 .gitattributes
 .prettierrc.yaml
 rollup.config.js

diff --git a/.vitepress/config.ts b/.vitepress/config.ts
@@ -0,0 +1,30 @@
+import {defineConfig} from "vitepress"
+import {wordless} from "../index"
+
+export default defineConfig({
+  markdown: {
+    config(md) {
+      md.use(wordless)
+    },
+  },
+  title: "Markdown-it Wordless",
+  themeConfig: {
+    socialLinks: [
+      {
+        icon: "github",
+        link: "https://github.com/treeinfra/markdown-it-wordless",
+      },
+    ],
+  },
+  locales: {
+    root: {label: "English", lang: "en", link: "/docs"},
+    zh: {
+      label: "简体中文",
+      lang: "zh",
+      link: "/docs/zh",
+      themeConfig: {
+        outline: {label: "目录"},
+      },
+    },
+  },
+})
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "esbenp.prettier-vscode",
+    "foxundermoon.shell-format",
+    "redhat.vscode-yaml"
+  ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,14 @@
+{
+  "editor.unicodeHighlight.allowedLocales": {
+    "zh-hans": true,
+    "zh-hant": true,
+    "ja": true,
+    "ko": true,
+    "ja-kana": true,
+    "ja-kana-ext": true,
+    "ja-kana-ext-phonetic": true,
+    "ja-kana-ext-phonetic-ext": true,
+    "ja-kana-ext-phonetic-ext-compat": true,
+    "ja-kana-ext-phonetic-ext-compat-ext": true
+  }
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## v1.1.0
+
+- Optimization for emoji spaces.
+- Optimization for Chinese and Japanese special punctuations.
+- VitePress documentation deployment.
+- Add npm topic tags.
+
 ## v1.0.0
 
 - Add MIT License to node manifest.

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # Markdown-it Wordless
 
-A markdown-it plugin to optimize wordless multi-language line-break render.
+A [markdown-it](https://markdown-it.github.io) plugin
+to optimize wordless multi-language line-break render.
 
 When a paragraph is long in markdown, we usually separate them into lines,
 and it will finally be rendered into a single line inside HTML.
@@ -31,7 +32,7 @@ import {Options} from "markdown-it-wordless"
 md.use(wordless)
 ```
 
-## Basic functions
+## Basic rules
 
 1. Wordful languages (such as English and Arabic) will be rendered as usual.
 2. It won't add a space when line break between the same wordless language.
@@ -87,14 +88,16 @@ Such optimization is unnecessary in most cases,
 because this plugin will not slow down the rendering process a lot
 in common cases (only a few milliseconds).
 And if you do want to customize,
-please make sure you've understand the source code.
-Please refer to [`data.ts`](./data.ts) for more details,
+please make sure you've understand the source code. Please refer to
+[`data.ts`](https://github.com/treeinfra/markdown-it-wordless/blob/main/data.ts)
+for more details,
 and here's documentation for each item in details.
 
 ## About the supported languages
 
 You can find all supported languages
-in the source code of [`data.ts`](./data.ts).
+in the source code of
+[`data.ts`](https://github.com/treeinfra/markdown-it-wordless/blob/main/data.ts).
 Each language or language series is an exported const
 that you can import and call.
 

diff --git a/data.ts b/data.ts
@@ -47,14 +47,23 @@ export type Options = {
 export const commonWords: LanguageRanges = [[0x0000, 0x0dff]]
 
 /**
- * Emoji is special ones:
- * Soft spaces between emojis will be kept,
- * while it won't add spaces between emojis and wordless languages.
- * And as it's special, the process of emoji is build-in,
- * and you should not include it inside the {@link Options}.
+ * Emoji is special:
+ * 1. Once line break between it and a wordful language, there will be space.
+ * 2. If line break between it and a wordless language, there won't be space.
  */
 export const emoji: LanguageRanges = [[0x1f000, 0x1fbff]]
 
+/**
+ * Chinese and Japanese punctuations (中文和日文标点/日本語と中国語の標点):
+ * Never add spaces beside such punctuations.
+ */
+export const chineseAndJapanesePunctuations: LanguageRanges = [
+  [0x3000, 0x303f], // 基本标点/基本標点
+  [0xfe10, 0xfe1f], // 竖排标点/縦書き標点
+  [0xfe30, 0xfe4f], // 竖排标点扩展/縦書き記号の拡張
+  [0xff00, 0xffef], // 全角标点/全角標点
+]
+
 /**
  * Chinese and Japanese characters (中文和日文/日本語と中国語).
  *
@@ -77,6 +86,7 @@ export const chineseAndJapanese: LanguageRanges = [
   // [0x3040, 0x309f], // 日文平假名/平仮名ひらがな
   // [0x30a0, 0x30ff], // 日文片假名/片仮名カタカナ
   [0x3040, 0x30ff],
+
   [0x3100, 0x312f], // 传统拼音注音符号(ㄆㄧㄣ ㄧㄣ)
   [0x3190, 0x319f], // 甲乙丙丁天地人...
   [0x31a0, 0x31bf], // 传统拼音注音字母(ㄆㄧㄣ ㄧㄣ)
@@ -91,9 +101,6 @@ export const chineseAndJapanese: LanguageRanges = [
   [0x31c0, 0x9fff],
 
   [0xf900, 0xfaff], // 兼容汉字/コンパチブル漢字の拡張
-  [0xfe10, 0xfe1f], // 竖排标点/縦書き記号
-  [0xfe30, 0xfe4f], // 竖排标点扩展/縦書き記号の拡張
-  [0xff00, 0xffef], // 全角符号/全角記号
   [0x1aff0, 0x1b16f], // 日文假名扩展/仮名の拡張
 
   // [0x1d300, 0x1d35f], // 太玄经符号/太玄經の記号
@@ -188,7 +195,10 @@ export const allWordless: LanguageRanges[] = [
  * @returns Index of the character in the given wordless language series,
  * if there's not {@link Range} contains such code,
  * it means this is not a character of a wordless language,
- * and it will return -1. And if it's an emoji, it will return -2.
+ * and it will return -1.
+ *
+ * There are also resolver for special conditions: emoji will return -2,
+ * and punctuations of Chinese and Japanese will return -3.
  */
 export function langIndexOf(code: number, options?: Options): number {
   options = {
@@ -203,11 +213,14 @@ export function langIndexOf(code: number, options?: Options): number {
     }
   }
 
+  // Process Chinese and Japanese punctuations.
+  for (const range of chineseAndJapanesePunctuations) {
+    if (code >= range[0] && code <= range[1]) return -3
+  }
+
   // Process Emoji.
-  for (const ranges of emoji) {
-    for (const range of ranges) {
-      if (code >= range[0] && code <= range[1]) return -2
-    }
+  for (const range of emoji) {
+    if (code >= range[0] && code <= range[1]) return -2
   }
 
   // Process wordless language index.
@@ -220,3 +233,11 @@ export function langIndexOf(code: number, options?: Options): number {
   }
   return -1
 }
+
+if (import.meta.vitest) {
+  const {expect, test} = import.meta.vitest
+
+  test("basic function", function () {
+    expect(langIndexOf("，".charCodeAt(0))).toBe(-3)
+  })
+}
diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,113 @@
+# Markdown-it Wordless
+
+A [markdown-it](https://markdown-it.github.io) plugin
+to optimize wordless multi-language line-break render.
+
+When a paragraph is long in markdown, we usually separate them into lines,
+and it will finally be rendered into a single line inside HTML.
+But for wordless languages (such as Chinese and Japanese),
+they do not use spaces to separate words,
+that they don't need a space to be added when processing line-break.
+
+If you are only working with a single wordless language,
+you can definitely use the following code,
+which will disable all spaces when line break
+(render single `\n` into an empty string rather than a space):
+
+```ts
+import md from "markdown-it"
+md.renderer.rules.softbreak = () => ""
+```
+
+But once working with multi-languages,
+especially when there's a mix of wordless and wordful languages,
+such as using Chinese and English in a single markdown document,
+such options cannot handle all cases.
+So here comes this `"markdown-it-wordless"` plugin,
+and you can use it like this:
+
+```ts
+import md from "markdown-it"
+import {Options} from "markdown-it-wordless"
+md.use(wordless)
+```
+
+## Basic rules
+
+1. Wordful languages (such as English and Arabic) will be rendered as usual.
+2. It won't add a space when line break between the same wordless language.
+3. It will add a space when line break between different wordless languages.
+4. Specially, Chinese and Japanese will be treated as a same language,
+   as there are many shared characters between them,
+   and their character styles are almost the same.
+5. Although Korean characters are like Chinese and Japanese (CJK),
+   Korean is not a wordless language, it uses spaces to separate words.
+
+## Use it with VitePress
+
+[VitePress](https://vitepress.dev) is an excellent static site generator,
+and this package is also inspired when the author using VitePress.
+It's strongly recommended to add such plugin to VitePress
+if you are using wordless languages. And here's how to config:
+
+```ts
+// <root>/.vitepress/config.ts
+import {defineConfig} from "vitepress"
+import {wordless} from "markdown-it-wordless"
+
+export default defineConfig({
+  markdown: {
+    config(md) {
+      md.use(wordless)
+    },
+  },
+  // Other configs...
+})
+```
+
+## Customize to optimize performance
+
+The default option will enable optimization
+for all registered wordless languages inside this package.
+If you want to optimize performance,
+you can specify what exactly wordless language you are using.
+You may also specify what wordful language you are using,
+because there's only optimization for wordful languages
+which unicode is less than `0x0dff`.
+
+Here's a simple example
+if you will only use Chinese or Japanese as wordless languages:
+
+```ts
+import md from "markdown-it"
+import {wordless, chineseAndJapanese, Options} from "markdown-it-wordless"
+md.use<Options>(wordless, {supportWordless: [chineseAndJapanese]})
+```
+
+Such optimization is unnecessary in most cases,
+because this plugin will not slow down the rendering process a lot
+in common cases (only a few milliseconds).
+And if you do want to customize,
+please make sure you've understand the source code. Please refer to
+[`data.ts`](https://github.com/treeinfra/markdown-it-wordless/blob/main/data.ts)
+for more details,
+and here's documentation for each item in details.
+
+## About the supported languages
+
+You can find all supported languages
+in the source code of
+[`data.ts`](https://github.com/treeinfra/markdown-it-wordless/blob/main/data.ts).
+Each language or language series is an exported const
+that you can import and call.
+
+The languages series are based on the [Unicode](https://unicode.org/charts/).
+Most of the languages are coded manually and some of them are
+generated by several AI models. So that there might be mistakes,
+and the author cannot guarantee the accuracy of the data
+because it's almost impossible for a single person to learn all such languages.
+
+If you are native speaker of one of the those wordless languages
+and you find there are some mistakes,
+or if there's even some wordless languages not included in this package,
+please feel free to open an issue.
diff --git a/docs/zh/index.md b/docs/zh/index.md
@@ -0,0 +1,29 @@
+# Markdown-it 换行空格优化插件
+
+包括中文在内的很多语言文字不像英文那样使用空格来分割词汇。
+在使用 Markdown 时，遇到段落很长，通常会将其分割成很多行。
+但 Markdown 在渲染时会默认将换行渲染为空格，
+而这样的空格在中文这种不用空格分割词汇的语言中显然是不合适的。
+
+```ts
+import md from "markdown-it"
+md.renderer.rules.softbreak = () => ""
+```
+
+在使用 [markdown-it](https://markdown-it.github.io) 时，
+可以通过上面的配置让 Markdown 中的单个换行符渲染为空字符串而非空格，
+但这样一来，对像英语这种需要用空格来分割单词的语言又会出问题。
+即在多语言文档，尤其是同时存在
+像中文这样不用空格分割词汇的语言 (wordless language)
+和像英语这样需要用空格来分割单词的语言 (wordful language) 时，
+这种简单的配置就不起作用了。
+
+所以作者才写了这个插件来处理这种问题：
+使用这个插件后，使用 Markdown 编辑中文这样的语言时，
+就可以随意的换行来而不必担心句子里被添加不美观的空格的问题了。
+
+```ts
+import md from "markdown-it"
+import {Options} from "markdown-it-wordless"
+md.use(wordless)
+```