Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
OTA_ENGINE_GITHUB_TOKEN=your_github_token_here
OTA_ENGINE_GITLAB_TOKEN=your_gitlab_token_here
OTA_ENGINE_GITLAB_RELEASES_TOKEN=your_gitlab_releases_token_here
OTA_ENGINE_DATAGOUV_API_KEY=your_datagouv_api_key_here
OTA_ENGINE_SENDINBLUE_API_KEY=your_sendinblue_api_key_here
OTA_ENGINE_SMTP_PASSWORD=your_smtp_password_here

Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased [minor]

> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs.

### Added

- Add support for publishing datasets to data.gouv.fr; configure `dataset.datagouv.datasetId` in configuration file and set `OTA_ENGINE_DATAGOUV_API_KEY` environment variable
- Add ability to publish datasets to multiple platforms simultaneously; datasets can now be published to GitHub (or GitLab) and data.gouv.fr in parallel

## 9.2.0 - 2025-11-05

_Full changeset and discussions: [#1173](https://github.com/OpenTermsArchive/engine/pull/1173)._
Expand Down
4 changes: 2 additions & 2 deletions bin/ota-dataset.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import logger from '../src/logger/index.js';

program
.name('ota dataset')
.description('Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
.description('Export the versions dataset into a ZIP file and optionally publish it to GitHub releases, GitLab releases, or data.gouv.fr')
.option('-f, --file <filename>', 'file name of the generated dataset')
.option('-p, --publish', 'publish dataset to GitHub releases on versions repository. Mandatory authentication to GitHub is provided through the `OTA_ENGINE_GITHUB_TOKEN` environment variable')
.option('-p, --publish', 'publish dataset. Supports GitHub releases (OTA_ENGINE_GITHUB_TOKEN), GitLab releases (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + config)')
.option('-r, --remove-local-copy', 'remove local copy of dataset after publishing. Works only in combination with --publish option')
.option('--schedule', 'schedule automatic dataset generation');

Expand Down
9 changes: 7 additions & 2 deletions scripts/dataset/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,18 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }

logger.info('Start publishing dataset…');

const releaseUrl = await publishRelease({
const results = await publishRelease({
archivePath,
releaseDate,
stats,
});

logger.info(`Dataset published to ${releaseUrl}`);
if (results.length > 0) {
logger.info('Dataset published to following platforms:');
results.forEach(result => {
logger.info(` - ${result.platform}: ${result.url}`);
});
}

if (!shouldRemoveLocalCopy) {
return;
Expand Down
14 changes: 13 additions & 1 deletion scripts/dataset/logger/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,19 @@ logger.format = combine(

const timestampPrefix = config.get('@opentermsarchive/engine.logger.timestampPrefix') ? `${timestamp} ` : '';

return `${timestampPrefix}${level.padEnd(15)} ${prefix.padEnd(50)} ${message}`;
const levelStr = level.padEnd(15);
let coloredLevel = levelStr;
let coloredMessage = message;

if (level.includes('warn')) {
coloredLevel = `\x1b[33m${levelStr}\x1b[0m`;
coloredMessage = `\x1b[33m${message}\x1b[0m`;
} else if (level.includes('error')) {
coloredLevel = `\x1b[31m${levelStr}\x1b[0m`;
coloredMessage = `\x1b[31m${message}\x1b[0m`;
}

return `${timestampPrefix} ${coloredLevel} ${prefix.padEnd(50)} ${coloredMessage}`;
}),
);

Expand Down
115 changes: 115 additions & 0 deletions scripts/dataset/publish/datagouv/dataset.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import fsApi from 'fs';
import path from 'path';

import FormData from 'form-data';
import nodeFetch from 'node-fetch';

import * as readme from '../../assets/README.template.js';
import logger from '../../logger/index.js';

const DATASET_LICENSE = 'odc-odbl';
const DEFAULT_RESOURCE_DESCRIPTION = 'See README.md inside the archive for dataset structure and usage information.';

export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, releaseDate, stats }) {
const updatePayload = {
title: readme.title({ releaseDate }),
description: readme.body(stats),
license: DATASET_LICENSE,
};

if (stats?.firstVersionDate && stats?.lastVersionDate) {
updatePayload.temporal_coverage = {
start: stats.firstVersionDate.toISOString(),
end: stats.lastVersionDate.toISOString(),
};
}

const updateResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/`, {
method: 'PUT',
headers: {
...headers,
'Content-Type': 'application/json',
},
body: JSON.stringify(updatePayload),
});

if (!updateResponse.ok) {
const errorText = await updateResponse.text();

throw new Error(`Failed to update dataset metadata: ${updateResponse.status} ${updateResponse.statusText} - ${errorText}`);
}
}

export async function uploadResource({ apiBaseUrl, headers, datasetId, archivePath }) {
logger.info('Uploading dataset archive…');

const formData = new FormData();
const fileName = path.basename(archivePath);
const fileStats = fsApi.statSync(archivePath);

formData.append('file', fsApi.createReadStream(archivePath), {
filename: fileName,
contentType: 'application/zip',
knownLength: fileStats.size,
});

const uploadResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/upload/`, {
method: 'POST',
headers: { ...formData.getHeaders(), ...headers },
body: formData,
});

if (!uploadResponse.ok) {
const errorText = await uploadResponse.text();

throw new Error(`Failed to upload dataset file: ${uploadResponse.status} ${uploadResponse.statusText} - ${errorText}`);
}

const uploadResult = await uploadResponse.json();

logger.info(`Dataset file uploaded successfully with resource ID: ${uploadResult.id}`);

return { resourceId: uploadResult.id, fileName };
}

export async function updateResourceMetadata({ apiBaseUrl, headers, datasetId, resourceId, fileName }) {
logger.info('Updating resource metadata…');

const resourceUpdateResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/`, {
method: 'PUT',
headers: { ...headers, 'Content-Type': 'application/json' },
body: JSON.stringify({
title: fileName,
description: DEFAULT_RESOURCE_DESCRIPTION,
filetype: 'file',
format: 'zip',
mime: 'application/zip',
}),
});

if (!resourceUpdateResponse.ok) {
const errorText = await resourceUpdateResponse.text();

throw new Error(`Failed to update resource metadata: ${resourceUpdateResponse.status} ${resourceUpdateResponse.statusText} - ${errorText}`);
}

logger.info('Resource metadata updated successfully');
}

export async function getDatasetUrl({ apiBaseUrl, headers, datasetId }) {
const datasetResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/`, {
method: 'GET',
headers: { ...headers },
});

if (!datasetResponse.ok) {
const errorText = await datasetResponse.text();

throw new Error(`Failed to retrieve dataset URL: ${datasetResponse.status} ${datasetResponse.statusText} - ${errorText}`);
}

const datasetData = await datasetResponse.json();
const datasetUrl = datasetData.page;

return datasetUrl;
}
47 changes: 47 additions & 0 deletions scripts/dataset/publish/datagouv/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import config from 'config';

import logger from '../../logger/index.js';

import { updateDatasetMetadata, uploadResource, updateResourceMetadata, getDatasetUrl } from './dataset.js';

const PRODUCTION_API_BASE_URL = 'https://www.data.gouv.fr/api/1';
const DEMO_API_BASE_URL = 'https://demo.data.gouv.fr/api/1';

function loadConfiguration() {
const apiKey = process.env.OTA_ENGINE_DATAGOUV_API_KEY;

if (!apiKey) {
throw new Error('OTA_ENGINE_DATAGOUV_API_KEY environment variable is required for data.gouv.fr publishing');
}

const datasetId = config.get('@opentermsarchive/engine.dataset.datagouv.datasetId');

if (!datasetId) {
throw new Error('datasetId is required in config at @opentermsarchive/engine.dataset.datagouv.datasetId. Run "node scripts/dataset/publish/datagouv/create-dataset.js" to create a dataset first.');
}

const useDemo = config.get('@opentermsarchive/engine.dataset.datagouv.useDemo');
const apiBaseUrl = useDemo ? DEMO_API_BASE_URL : PRODUCTION_API_BASE_URL;

if (useDemo) {
logger.warn('Using demo.data.gouv.fr environment for testing');
}

const headers = { 'X-API-KEY': apiKey };

return { datasetId, apiBaseUrl, headers };
}

export default async function publish({ archivePath, releaseDate, stats }) {
const config = loadConfiguration();

await updateDatasetMetadata({ ...config, releaseDate, stats });

const { resourceId, fileName } = await uploadResource({ ...config, archivePath });

await updateResourceMetadata({ ...config, resourceId, fileName });

const datasetUrl = await getDatasetUrl({ ...config });

return datasetUrl;
}
44 changes: 39 additions & 5 deletions scripts/dataset/publish/index.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,49 @@
import config from 'config';

import logger from '../logger/index.js';

import publishDataGouv from './datagouv/index.js';
import publishGitHub from './github/index.js';
import publishGitLab from './gitlab/index.js';

export default function publishRelease({ archivePath, releaseDate, stats }) {
export default async function publishRelease({ archivePath, releaseDate, stats }) {
const platforms = [];

// If both GitHub and GitLab tokens are defined, GitHub takes precedence
if (process.env.OTA_ENGINE_GITHUB_TOKEN) {
return publishGitHub({ archivePath, releaseDate, stats });
platforms.push({ name: 'GitHub', publish: () => publishGitHub({ archivePath, releaseDate, stats }) });
} else if (process.env.OTA_ENGINE_GITLAB_TOKEN) {
platforms.push({ name: 'GitLab', publish: () => publishGitLab({ archivePath, releaseDate, stats }) });
}

if (process.env.OTA_ENGINE_DATAGOUV_API_KEY && config.get('@opentermsarchive/engine.dataset.datagouv.datasetId')) {
platforms.push({ name: 'data.gouv.fr', publish: () => publishDataGouv({ archivePath, releaseDate, stats }) });
}

if (!platforms.length) {
throw new Error('No publishing platform configured. Please configure at least one of: GitHub (OTA_ENGINE_GITHUB_TOKEN), GitLab (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + datasetId in config).');
}

if (process.env.OTA_ENGINE_GITLAB_TOKEN) {
return publishGitLab({ archivePath, releaseDate, stats });
const results = await Promise.allSettled(platforms.map(async platform => {
const url = await platform.publish();

return { platform: platform.name, url };
}));

const succeeded = results.filter(result => result.status === 'fulfilled');
const failed = results.filter(result => result.status === 'rejected');

if (failed.length) {
let errorMessage = !succeeded.length ? 'All platforms failed to publish:' : 'Some platforms failed to publish:';

failed.forEach(rejectedResult => {
const index = results.indexOf(rejectedResult);

errorMessage += `\n - ${platforms[index].name}: ${rejectedResult.reason.message}`;
});

logger.error(errorMessage);
}

throw new Error('No GitHub nor GitLab token found in environment variables (OTA_ENGINE_GITHUB_TOKEN or OTA_ENGINE_GITLAB_TOKEN). Cannot publish the dataset without authentication.');
return succeeded.map(result => result.value);
}