diff --git a/docs/en/chdb/data-formats.md b/docs/en/chdb/data-formats.md index 954dc3f43b9..dcbcc3d6f62 100644 --- a/docs/en/chdb/data-formats.md +++ b/docs/en/chdb/data-formats.md @@ -18,83 +18,113 @@ As well as the data formats that ClickHouse supports, chDB also supports: The supported data formats from ClickHouse are: -| Format | Input | Output | -|---------------------------------|-------|--------| -| TabSeparated | ✔ | ✔ | -| TabSeparatedRaw | ✔ | ✔ | -| TabSeparatedWithNames | ✔ | ✔ | -| TabSeparatedWithNamesAndTypes | ✔ | ✔ | -| TabSeparatedRawWithNames | ✔ | ✔ | -| TabSeparatedRawWithNamesAndTypes| ✔ | ✔ | -| Template | ✔ | ✔ | -| TemplateIgnoreSpaces | ✔ | ✗ | -| CSV | ✔ | ✔ | -| CSVWithNames | ✔ | ✔ | -| CSVWithNamesAndTypes | ✔ | ✔ | -| CustomSeparated | ✔ | ✔ | -| CustomSeparatedWithNames | ✔ | ✔ | -| CustomSeparatedWithNamesAndTypes| ✔ | ✔ | -| SQLInsert | ✗ | ✔ | -| Values | ✔ | ✔ | -| Vertical | ✗ | ✔ | -| JSON | ✔ | ✔ | -| JSONAsString | ✔ | ✗ | -| JSONStrings | ✔ | ✔ | -| JSONColumns | ✔ | ✔ | -| JSONColumnsWithMetadata | ✔ | ✔ | -| JSONCompact | ✔ | ✔ | -| JSONCompactStrings | ✗ | ✔ | -| JSONCompactColumns | ✔ | ✔ | -| JSONEachRow | ✔ | ✔ | -| PrettyJSONEachRow | ✗ | ✔ | -| JSONEachRowWithProgress | ✗ | ✔ | -| JSONStringsEachRow | ✔ | ✔ | -| JSONStringsEachRowWithProgress | ✗ | ✔ | -| JSONCompactEachRow | ✔ | ✔ | -| JSONCompactEachRowWithNames | ✔ | ✔ | -| JSONCompactEachRowWithNamesAndTypes | ✔ | ✔ | -| JSONCompactStringsEachRow | ✔ | ✔ | -| JSONCompactStringsEachRowWithNames | ✔ | ✔ | + + +| Name | Input | Output | +| --- | --- | --- | +| Arrow | ✔ | ✔ | +| ArrowStream | ✔ | ✔ | +| Avro | ✔ | ✔ | +| AvroConfluent | ✔ | ✗ | +| BSONEachRow | ✔ | ✔ | +| CSV | ✔ | ✔ | +| CSVWithNames | ✔ | ✔ | +| CSVWithNamesAndTypes | ✔ | ✔ | +| CapnProto | ✔ | ✔ | +| CustomSeparated | ✔ | ✔ | +| CustomSeparatedIgnoreSpaces | ✔ | ✗ | +| CustomSeparatedIgnoreSpacesWithNames | ✔ | ✗ | +| CustomSeparatedIgnoreSpacesWithNamesAndTypes | ✔ | ✗ | +| CustomSeparatedWithNames | ✔ | ✔ | +| CustomSeparatedWithNamesAndTypes | ✔ | ✔ | +| DWARF | ✔ | ✗ | +| Form | ✔ | ✗ | +| HiveText | ✔ | ✗ | +| JSON | ✔ | ✔ | +| JSONAsObject | ✔ | ✗ | +| JSONAsString | ✔ | ✗ | +| JSONColumns | ✔ | ✔ | +| JSONColumnsWithMetadata | ✔ | ✔ | +| JSONCompact | ✔ | ✔ | +| JSONCompactColumns | ✔ | ✔ | +| JSONCompactEachRow | ✔ | ✔ | +| JSONCompactEachRowWithNames | ✔ | ✔ | +| JSONCompactEachRowWithNamesAndTypes | ✔ | ✔ | +| JSONCompactStrings | ✗ | ✔ | +| JSONCompactStringsEachRow | ✔ | ✔ | +| JSONCompactStringsEachRowWithNames | ✔ | ✔ | | JSONCompactStringsEachRowWithNamesAndTypes | ✔ | ✔ | -| JSONObjectEachRow | ✔ | ✔ | -| BSONEachRow | ✔ | ✔ | -| TSKV | ✔ | ✔ | -| Pretty | ✗ | ✔ | -| PrettyNoEscapes | ✗ | ✔ | -| PrettyMonoBlock | ✗ | ✔ | -| PrettyNoEscapesMonoBlock | ✗ | ✔ | -| PrettyCompact | ✗ | ✔ | -| PrettyCompactNoEscapes | ✗ | ✔ | -| PrettyCompactMonoBlock | ✗ | ✔ | -| PrettyCompactNoEscapesMonoBlock | ✗ | ✔ | -| PrettySpace | ✗ | ✔ | -| PrettySpaceNoEscapes | ✗ | ✔ | -| PrettySpaceMonoBlock | ✗ | ✔ | -| PrettySpaceNoEscapesMonoBlock | ✗ | ✔ | -| Prometheus | ✗ | ✔ | -| Protobuf | ✔ | ✔ | -| ProtobufSingle | ✔ | ✔ | -| Avro | ✔ | ✔ | -| AvroConfluent | ✔ | ✗ | -| Parquet | ✔ | ✔ | -| ParquetMetadata | ✔ | ✗ | -| Arrow | ✔ | ✔ | -| ArrowStream | ✔ | ✔ | -| ORC | ✔ | ✔ | -| One | ✔ | ✗ | -| RowBinary | ✔ | ✔ | -| RowBinaryWithNames | ✔ | ✔ | -| RowBinaryWithNamesAndTypes | ✔ | ✔ | -| RowBinaryWithDefaults | ✔ | ✔ | -| Native | ✔ | ✔ | -| Null | ✗ | ✔ | -| XML | ✗ | ✔ | -| CapnProto | ✔ | ✔ | -| LineAsString | ✔ | ✔ | -| Regexp | ✔ | ✗ | -| RawBLOB | ✔ | ✔ | -| MsgPack | ✔ | ✔ | -| MySQLDump | ✔ | ✗ | -| Markdown | ✗ | ✔ | +| JSONEachRow | ✔ | ✔ | +| JSONEachRowWithProgress | ✗ | ✔ | +| JSONLines | ✔ | ✔ | +| JSONObjectEachRow | ✔ | ✔ | +| JSONStrings | ✗ | ✔ | +| JSONStringsEachRow | ✔ | ✔ | +| JSONStringsEachRowWithProgress | ✗ | ✔ | +| LineAsString | ✔ | ✔ | +| LineAsStringWithNames | ✗ | ✔ | +| LineAsStringWithNamesAndTypes | ✗ | ✔ | +| Markdown | ✗ | ✔ | +| MsgPack | ✔ | ✔ | +| MySQLDump | ✔ | ✗ | +| MySQLWire | ✗ | ✔ | +| NDJSON | ✔ | ✔ | +| Native | ✔ | ✔ | +| Npy | ✔ | ✔ | +| Null | ✗ | ✔ | +| ODBCDriver2 | ✗ | ✔ | +| ORC | ✔ | ✔ | +| One | ✔ | ✗ | +| Parquet | ✔ | ✔ | +| ParquetMetadata | ✔ | ✗ | +| PostgreSQLWire | ✗ | ✔ | +| Pretty | ✗ | ✔ | +| PrettyCompact | ✗ | ✔ | +| PrettyCompactMonoBlock | ✗ | ✔ | +| PrettyCompactNoEscapes | ✗ | ✔ | +| PrettyCompactNoEscapesMonoBlock | ✗ | ✔ | +| PrettyJSONEachRow | ✗ | ✔ | +| PrettyJSONLines | ✗ | ✔ | +| PrettyMonoBlock | ✗ | ✔ | +| PrettyNDJSON | ✗ | ✔ | +| PrettyNoEscapes | ✗ | ✔ | +| PrettyNoEscapesMonoBlock | ✗ | ✔ | +| PrettySpace | ✗ | ✔ | +| PrettySpaceMonoBlock | ✗ | ✔ | +| PrettySpaceNoEscapes | ✗ | ✔ | +| PrettySpaceNoEscapesMonoBlock | ✗ | ✔ | +| Prometheus | ✗ | ✔ | +| Protobuf | ✔ | ✔ | +| ProtobufList | ✔ | ✔ | +| ProtobufSingle | ✔ | ✔ | +| Raw | ✔ | ✔ | +| RawBLOB | ✔ | ✔ | +| RawWithNames | ✔ | ✔ | +| RawWithNamesAndTypes | ✔ | ✔ | +| Regexp | ✔ | ✗ | +| RowBinary | ✔ | ✔ | +| RowBinaryWithDefaults | ✔ | ✗ | +| RowBinaryWithNames | ✔ | ✔ | +| RowBinaryWithNamesAndTypes | ✔ | ✔ | +| SQLInsert | ✗ | ✔ | +| TSKV | ✔ | ✔ | +| TSV | ✔ | ✔ | +| TSVRaw | ✔ | ✔ | +| TSVRawWithNames | ✔ | ✔ | +| TSVRawWithNamesAndTypes | ✔ | ✔ | +| TSVWithNames | ✔ | ✔ | +| TSVWithNamesAndTypes | ✔ | ✔ | +| TabSeparated | ✔ | ✔ | +| TabSeparatedRaw | ✔ | ✔ | +| TabSeparatedRawWithNames | ✔ | ✔ | +| TabSeparatedRawWithNamesAndTypes | ✔ | ✔ | +| TabSeparatedWithNames | ✔ | ✔ | +| TabSeparatedWithNamesAndTypes | ✔ | ✔ | +| Template | ✔ | ✔ | +| TemplateIgnoreSpaces | ✔ | ✗ | +| Values | ✔ | ✔ | +| Vertical | ✗ | ✔ | +| XML | ✗ | ✔ | + For further information and examples, see [ClickHouse formats for input and output data](/docs/en/interfaces/formats). \ No newline at end of file diff --git a/package.json b/package.json index 9b9e2152f46..4263a260861 100644 --- a/package.json +++ b/package.json @@ -19,9 +19,11 @@ "new-build": "bash ./copyClickhouseRepoDocs.sh && bash ./scripts/settings/autogenerate-settings.sh && yarn build-api-doc && yarn build && yarn build-swagger", "start": "docusaurus start", "swizzle": "docusaurus swizzle", - "write-heading-ids": "docusaurus write-heading-ids" + "write-heading-ids": "docusaurus write-heading-ids", + "autogen_data_formats_table" : "node scripts/autogenerated-content/autogen_data_formats_tables.mjs" }, "dependencies": { + "@clickhouse/client": "^1.10.0", "@docusaurus/core": "2.3.1", "@docusaurus/plugin-client-redirects": "2.3.1", "@docusaurus/preset-classic": "2.3.1", diff --git a/scripts/autogenerated-content/autogen_data_formats_tables.mjs b/scripts/autogenerated-content/autogen_data_formats_tables.mjs new file mode 100644 index 00000000000..78257e9cd35 --- /dev/null +++ b/scripts/autogenerated-content/autogen_data_formats_tables.mjs @@ -0,0 +1,32 @@ +import { createClient } from '@clickhouse/client' +import {jsonToTable, insertTextBetweenTags} from './utilities.mjs'; + +/* +This script is used to automatically generate the tables of data formats found at: +https://clickhouse.com/docs/en/interfaces/formats +https://clickhouse.com/docs/en/chdb/data-formats + */ + +const play_endpoint = 'https://play.clickhouse.com/'; +const client = createClient({ + /* configuration */ + url: play_endpoint, + username: 'explorer' +}) + +const resultSet = await client.query({ + query: 'SELECT name AS Name, if(is_input, \'✔\', \'✗\') AS Input,' + + 'if(is_output, \'✔\', \'✗\') AS Output ' + + 'FROM system.formats ORDER BY name ASC' +}) +const dataset = await resultSet.json() + +let data_formats_table = jsonToTable(dataset.data) +// file paths should be provided relative +const file_paths = ['docs/en/interfaces/formats.md', 'docs/en/chdb/data-formats.md'] +const startTag = ''; +const endTag = ''; + +file_paths.forEach((file_path) => { + insertTextBetweenTags(file_path, data_formats_table, startTag, endTag); +}) diff --git a/scripts/autogenerated-content/utilities.mjs b/scripts/autogenerated-content/utilities.mjs new file mode 100644 index 00000000000..dc7c5d76968 --- /dev/null +++ b/scripts/autogenerated-content/utilities.mjs @@ -0,0 +1,41 @@ +import * as fs from 'fs' +const jsonToTable = (jsonData) => { + if (!Array.isArray(jsonData) || jsonData.length === 0) { + return ""; + } + + const headers = Object.keys(jsonData[0]); + const headerRow = `| ${headers.join(' | ')} |\n`; + const separatorRow = `| ${headers.map(() => '---').join(' | ')} |\n`; + + const rows = jsonData.map(obj => `| ${headers.map(key => obj[key] || '').join(' | ')} |`); + + return `\n${headerRow}${separatorRow}${rows.join('\n')}\n`; +} + +const insertTextBetweenTags = (filePath, textToInsert, startTag, endTag) => { + try { + const fileContent = fs.readFileSync(filePath, 'utf-8'); + + const startIndex = fileContent.indexOf(startTag) + startTag.length; + const endIndex = fileContent.indexOf(endTag); + + if (startIndex === -1 || endIndex === -1) { + console.error(`Error: Tags "${startTag}" or "${endTag}" not found in the file.`); + return; + } + + const newContent = + fileContent.substring(0, startIndex) + + textToInsert + + fileContent.substring(endIndex); + + fs.writeFileSync(filePath, newContent, 'utf-8'); + console.log('Text inserted successfully.'); + + } catch (err) { + console.error(`Error: ${err}`); + } +} + +export {jsonToTable, insertTextBetweenTags} \ No newline at end of file