Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial version of platform-independent CDS extractor #169

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions extractors/cds/tools/autobuild.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@echo off

type NUL && "%CODEQL_DIST%\codeql.exe" database index-files ^
--include-extension=.cds ^
--language cds ^
--prune **\node_modules\**\* ^
--prune **\.eslint\**\* ^
--total-size-limit=10m ^
-- ^
"%CODEQL_EXTRACTOR_CDS_WIP_DATABASE%"

exit /b %ERRORLEVEL%
15 changes: 5 additions & 10 deletions extractors/cds/tools/autobuild.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
#!/bin/sh
#!/usr/bin/env bash

set -eu

# NOTE: the code below is copied in three places:
# - scripts/compile-cds.sh
# - extractors/cds/tools/autobuild.sh (here)
# - extractors/javascript/tools/pre-finalize.sh
# Any changes should be synchronized between these three places.

exec "${CODEQL_DIST}/codeql" database index-files \
--language cds \
--total-size-limit 10m \
--include-extension=.cds \
--language cds \
--prune **/node_modules/**/* \
--prune **/.eslint/**/* \
"$CODEQL_EXTRACTOR_CDS_WIP_DATABASE"
--total-size-limit=10m \
-- \
"$CODEQL_EXTRACTOR_CDS_WIP_DATABASE"
54 changes: 54 additions & 0 deletions extractors/cds/tools/index-files.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
@echo off

if "%~1"=="" (
echo Usage: %0 ^<response_file_path^>
exit /b 1
)

where node >nul 2>nul
if %ERRORLEVEL% neq 0 (
echo node executable is required (in PATH) to run the 'index-files.js' script. Please install Node.js and try again.
exit /b 2
)

where npm >nul 2>nul
if %ERRORLEVEL% neq 0 (
echo npm executable is required (in PATH) to install the dependencies for the 'index-files.js' script.
exit /b 3
)

set "_response_file_path=%~1"
set "_script_dir=%~dp0"
REM Set _cwd before changing the working directory to the script directory.
set "_cwd=%CD%"

echo Checking response file for CDS files to index

REM Terminate early if the _response_file_path doesn't exist or is empty,
REM which indicates that no CDS files were selected or found.
if not exist "%_response_file_path%" (
echo 'codeql database index-files --language cds' command terminated early as response file '%_response_file_path%' does not exist or is empty. This is because no CDS files were selected or found.
exit /b 0
)

REM Change to the directory of this script to ensure that npm looks up the
REM package.json file in the correct directory and installs the dependencies
REM (i.e. node_modules) relative to this directory. This is technically a
REM violation of the assumption that extractor scripts will be run with the
REM current working directory set to the root of the project source, but we
REM also need node_modules to be installed here and not in the project source
REM root, so we make a compromise of:
REM 1. changing to this script's directory;
REM 2. installing node dependencies here;
REM 3. passing the original working directory as a parameter to the
REM index-files.js script;
REM 4. expecting the index-files.js script to immediately change back to
REM the original working (aka the project source root) directory.

cd /d "%_script_dir%" && ^
echo Installing node package dependencies && ^
npm install --quiet --no-audit --no-fund && ^
echo Running the 'index-files.js' script && ^
node "%_script_dir%index-files.js" "%_response_file_path%" "%_cwd%"

exit /b %ERRORLEVEL%
302 changes: 302 additions & 0 deletions extractors/cds/tools/index-files.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
const { execFileSync, spawnSync } = require('child_process');
const { existsSync, readFileSync, statSync, writeFileSync } = require('fs');
const { arch, platform } = require('os');
const { basename, dirname, join, resolve } = require('path');
const { quote } = require('shell-quote');

// Terminate early if this script is not invoked with the required arguments.
if (process.argv.length !== 4) {
console.warn(`Usage: node index-files.js <response-file> <source-root>`);
process.exit(0);
}

const responseFile = process.argv[2];
const sourceRoot = process.argv[3];

// Force this script, and any process it spawns, to use the project (source)
// root directory as the current working directory.
process.chdir(sourceRoot);

console.log(`Indexing CDS files in project source directory: ${sourceRoot}`);

const osPlatform = platform();
const osPlatformArch = arch();
console.log(`Detected OS platform=${osPlatform} : arch=${osPlatformArch}`);
const codeqlExe = osPlatform === 'win32' ? 'codeql.exe' : 'codeql';
const codeqlExePath = resolve(join(quote([process.env.CODEQL_DIST]), codeqlExe));

if (!existsSync(sourceRoot)) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early due to internal error: could not find project root directory '${sourceRoot}'.`);
process.exit(0);
}

let CODEQL_EXTRACTOR_JAVASCRIPT_ROOT = process.env.CODEQL_EXTRACTOR_JAVASCRIPT_ROOT
? quote([process.env.CODEQL_EXTRACTOR_JAVASCRIPT_ROOT])
: undefined;
// Check if the (JavaScript) JS extractor variables are set, and set them if not.
if (!CODEQL_EXTRACTOR_JAVASCRIPT_ROOT) {
// Find the JS extractor location.
CODEQL_EXTRACTOR_JAVASCRIPT_ROOT = execFileSync(
codeqlExePath,
['resolve', 'extractor', '--language=javascript']
).toString().trim();
// Terminate early if the CODEQL_EXTRACTOR_JAVASCRIPT_ROOT environment
// variable was not already set and could not be resolved via CLI.
if (!CODEQL_EXTRACTOR_JAVASCRIPT_ROOT) {
console.warn(
`'${codeqlExe} database index-files --language cds' terminated early as CODEQL_EXTRACTOR_JAVASCRIPT_ROOT environment variable is not set.`
);
process.exit(0);
}
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_ROOT = CODEQL_EXTRACTOR_JAVASCRIPT_ROOT;
// Set the JAVASCRIPT extractor environment variables to the same as the CDS
// extractor environment variables so that the JS extractor will write to the
// CDS database.
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_WIP_DATABASE = process.env.CODEQL_EXTRACTOR_CDS_WIP_DATABASE;
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_DIAGNOSTIC_DIR = process.env.CODEQL_EXTRACTOR_CDS_DIAGNOSTIC_DIR;
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_LOG_DIR = process.env.CODEQL_EXTRACTOR_CDS_LOG_DIR;
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_SCRATCH_DIR = process.env.CODEQL_EXTRACTOR_CDS_SCRATCH_DIR;
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_TRAP_DIR = process.env.CODEQL_EXTRACTOR_CDS_TRAP_DIR;
process.env.CODEQL_EXTRACTOR_JAVASCRIPT_SOURCE_ARCHIVE_DIR = process.env.CODEQL_EXTRACTOR_CDS_SOURCE_ARCHIVE_DIR;
}

const autobuildScriptName = osPlatform === 'win32' ? 'autobuild.cmd' : 'autobuild.sh';
const autobuildScriptPath = resolve(join(
CODEQL_EXTRACTOR_JAVASCRIPT_ROOT, 'tools', autobuildScriptName
));

/**
* Terminate early if:
* - the javascript extractor autobuild script does not exist; or
* - the codeql executable does not exist; or
* - the input responseFile does not exist; or
* - the input responseFile is empty or could not be parsed as a list of file paths.
*/
if (!existsSync(autobuildScriptPath)) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early as autobuild script '${autobuildScriptPath}' does not exist.`);
process.exit(0);
}
if (!existsSync(codeqlExePath)) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early as codeql executable '${codeqlExePath}' does not exist.`);
process.exit(0);
}
if (!existsSync(responseFile)) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early as response file '${responseFile}' does not exist. This is because no CDS files were selected or found.`);
process.exit(0);
}

let responseFiles = [];
try {
// Read the response file and split it into lines, removing (filter(Boolean)) empty lines.
responseFiles = readFileSync(responseFile, 'utf-8').split('\n').filter(Boolean);
if (statSync(responseFile).size === 0 || responseFiles.length === 0) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early as response file '${responseFile}' is empty. This is because no CDS files were selected or found.`);
process.exit(0);
}
} catch (err) {
console.warn(`'${codeqlExe} database index-files --language cds' terminated early as response file '${responseFile}' could not be read due to an error: ${err}`);
process.exit(0);
}

// Determine if we have the cds commands available. If not, install the cds develpment kit
// (cds-dk) in the appropriate directories and use npx to run the cds command from there.
let cdsCommand = 'cds';
try {
execFileSync('cds', ['--version'], { stdio: 'ignore' });
} catch {
console.log('Pre-installing cds compiler ...');

// Use a JS `Set` to avoid duplicate processing of the same directory.
const packageJsonDirs = new Set();
/**
* Find all the directories containing a package.json with a dependency on `@sap/cds`,
* where the directory contains at least one of the files listed in the response file
* (e.g. the cds files we want to extract).
*
* We then install the CDS development kit (`@sap/cds-dk`) in each directory, which
* makes the `cds` command usable from the npx command within that directory.
*
* Nested package.json files simply cause the package to be installed in the parent
* node_modules directory.
*
* We also ensure we skip node_modules, as we can end up in a recursive loop.
*
* NOTE: The original (sh-based) implementation of this extractor would also capture
* "grandfathered" package.json files, which are package.json files that exist in a
* parent directory of the first package.json file found. This (js-based) implementation
* removes this behavior as it seems unnecessary and potentially problematic.
*/
responseFiles.forEach(file => {
let dir = dirname(quote([file]));
while (dir !== resolve(dir, '..')) {
const packageJsonPath = join(dir, 'package.json');
if (existsSync(packageJsonPath)) {
const rawData = readFileSync(packageJsonPath, 'utf-8');
const packageJsonData = JSON.parse(rawData);
// Check if the 'name' and 'dependencies' properties are present in the
// package.json file at packageJsonPath.
if (
packageJsonData.name &&
packageJsonData.dependencies &&
typeof packageJsonData.dependencies === 'object'
) {
const dependencyNames = Object.keys(packageJsonData.dependencies);
if (dependencyNames.includes('@sap/cds')) {
packageJsonDirs.add(dir);
break;
}
}
}
// Move up one directory level and try again to find a package.json file
// for the response file.
dir = resolve(dir, '..');
}
});

// Sanity check that we found at least one package.json directory from which the CDS
// compiler dependencies may be installed.
if (packageJsonDirs.size === 0) {
console.warn('WARN: failed to detect any package.json directories for cds compiler installation.');
exit(0);
}

packageJsonDirs.forEach((dir) => {
console.log(`Installing '@sap/cds-dk' into ${dir} to enable CDS compilation ...`);
execFileSync(
'npm',
['install', '--quiet', '--no-audit', '--no-fund', '--no-save', '@sap/cds-dk'],
{ cwd: dir, stdio: 'inherit' }
);
console.log(`Installing node packages into ${dir} to enable CDS compilation ...`);
execFileSync(
'npm',
['install', '--quiet', '--no-audit', '--no-fund'],
{ cwd: dir, stdio: 'inherit' }
);
});

/**
* Use the `npx` command to dynamically install the CDS development kit (`@sap/cds-dk`)
* package if necessary, which then provides the `cds` command line tool in directories
* which are not covered by the package.json install command approach above.
*/
cdsCommand = 'npx -y --package @sap/cds-dk cds';
}

console.log('Processing CDS files to JSON ...');

/**
* Run the cds compile command on each file in the response files list, outputting the
* compiled JSON to a file with the same name but with a .json extension appended.
*/
responseFiles.forEach(rawCdsFilePath => {
const cdsFilePath = quote([rawCdsFilePath]);
const cdsJsonFilePath = `${cdsFilePath}.json`;
console.log(`Processing CDS file ${cdsFilePath} to ${cdsJsonFilePath} ...`);
const result = spawnSync(
cdsCommand,
[
'compile', cdsFilePath,
'-2', 'json',
'--locations',
'--log-level', 'warn'
],
{ cwd: dirname(cdsFilePath), shell: true, stdio: 'pipe' }
);
if (result.error || result.status !== 0 || !result.stdout) {
const errorMessage = `Could not compile the file ${cdsFilePath}.\nReported error(s):\n\`\`\`\n${result.stderr.toString()}\n\`\`\``;
console.log(errorMessage);
try {
execFileSync(
codeqlExePath,
[
'database',
'add-diagnostic',
'--extractor-name=cds',
'--ready-for-status-page',
'--source-id=cds/compilation-failure',
'--source-name="Failure to compile one or more SAP CAP CDS files"',
'--severity=error',
`--markdown-message="${errorMessage}"`,
`--file-path="${cdsFilePath}"`,
'--',
`${process.env.CODEQL_EXTRACTOR_CDS_WIP_DATABASE}`
],
);
console.log(`Added error diagnostic for source file: ${cdsFilePath}`);
} catch (err) {
console.error(`Failed to add error diagnostic for source file=${cdsFilePath} : ${err}`);
}
}
// Write the compiled JSON result to cdsJsonFilePath.
writeFileSync(cdsJsonFilePath, result.stdout);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of interest, why do we no longer use the -o flag to the cds compiler?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to cds compiler -h, the -o (or --dest) option now Writes output to the given folder instead of stdout.
This seems like a change in behavior and it is causing problems for our existing (sh-based) extractor.

For an example CDS file called service1.cds, using -2 json -o service1.cds.json will actually create service1.cds.json/service1.json. This path mismatch causes many SARIF diffs.

I probably need to submit a fix for the sh-based extractor, simply because I need a working reference to compare against.

});

let excludeFilters = '';
/**
* Check if LGTM_INDEX_FILTERS is already set. This tyically happens if either
* "paths" and/or "paths-ignore" is set in the lgtm.yml file.
*/
if (process.env.LGTM_INDEX_FILTERS) {
console.log(`Found $LGTM_INDEX_FILTERS already set to:\n${process.env.LGTM_INDEX_FILTERS}`);
const allowedExcludePatterns = [
join('exclude:**', '*'),
join('exclude:**', '*.*'),
];
/**
* If it is set, we will try to honor the paths-ignore filter.
*
* Split by `\n` and find all the entries that start with exclude, with some
* exclusions allowed for supported glob patterns, and then join them back
* together with `\n`.
*/
excludeFilters = '\n' + process.env.LGTM_INDEX_FILTERS
.split('\n')
.filter(line =>
line.startsWith('exclude')
&&
!allowedExcludePatterns.some(pattern => line.includes(pattern))
).join('\n');
}

// Enable extraction of the .cds.json files only.
const lgtmIndexFiltersPatterns = [
join('exclude:**', '*.*'),
join('include:**', '*.cds.json'),
join('include:**', '*.cds'),
join('exclude:**', 'node_modules', '**', '*.*')
].join('\n');;
process.env.LGTM_INDEX_FILTERS = lgtmIndexFiltersPatterns + excludeFilters;
console.log(`Set $LGTM_INDEX_FILTERS to:\n${process.env.LGTM_INDEX_FILTERS}`);
process.env.LGTM_INDEX_TYPESCRIPT = 'NONE';
// Configure to copy over the .cds files as well, by pretending they are JSON.
process.env.LGTM_INDEX_FILETYPES = '.cds:JSON';
// Ignore the LGTM_INDEX_INCLUDE variable for this purpose as it may explicitly
// refer to .js or .ts files.
delete process.env.LGTM_INDEX_INCLUDE;

console.log(
`Extracting the .cds.json files by running the 'javascript' extractor autobuild script:
${autobuildScriptPath}`
);
/**
* Invoke the javascript autobuilder to index the .cds.json files only.
*
* Environment variables must be passed from this script's process to the
* process that invokes the autobuild script, otherwise the CDS autobuild.sh
* script will not be invoked by the autobuild script built into the
* 'javascript' extractor.
*
* IMPORTANT: The JavaScript extractor autobuild script must be invoked with
* the current working directory set to the project (source) root directory
* because it assumes it is running from there. The JavaScript extractor will
* only find the .cds files to index (to the database) if those file are
* relative to where the autobuild script is invoked from, which should be the
* same as the `--source-root` argument passed to the `codeql database create`
* command.
*/
spawnSync(
autobuildScriptPath,
[],
{ cwd: sourceRoot, env: process.env, shell: true, stdio: 'inherit' }
);
Loading
Loading