Skip to content

Commit 5a662f1

Browse files
isoosjonasfj
andauthoredJan 6, 2023
Batch analyze licenses + compare changes. (#1184)
* Batch analyze licenses + compare changes. * Update third_party/pub/README.md Co-authored-by: Jonas Finnemann Jensen <jopsen@gmail.com> * Update third_party/pub/download_pub_licenses.sh Co-authored-by: Jonas Finnemann Jensen <jopsen@gmail.com> * Updated based on review comments. * Explicit output file name. * Update .gitignore Co-authored-by: Jonas Finnemann Jensen <jopsen@gmail.com> Co-authored-by: Jonas Finnemann Jensen <jopsen@gmail.com>
1 parent c95e64c commit 5a662f1

File tree

4 files changed

+143
-0
lines changed

4 files changed

+143
-0
lines changed
 

‎tool/license_detection/README.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Evaluate license detection changes
2+
3+
The tools in this directory help to evaluate changes in the license-detection code,
4+
by downloading licenses from `pub.dev` and running a difference on the license
5+
detection output before and after the changes.
6+
7+
The cached licenses are stored in `.dart_tool/pana/license-cache/`.
8+
9+
- Use `download_pub_dev_licenses.sh` to populate the directory with fresh licenses from `pub.dev`.
10+
- Run `batch_analyse_licenses.dart` before and after a license detection change.
11+
- Run `compare_analysis.dart` with the before and after file of the prior change.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
import 'dart:io';
7+
8+
import 'package:pana/src/license.dart';
9+
10+
Future<void> main(List<String> args) async {
11+
if (args.isEmpty || args.contains('--help')) {
12+
print('dart batch_analyse_licenses.dart <output.json>');
13+
print('');
14+
print(
15+
'Read all license files from the license cache directory and create an '
16+
'aggregated summary of the results.');
17+
return;
18+
}
19+
final outputFileName = args.single;
20+
21+
final files = Directory('.dart_tool/pana/license-cache')
22+
.listSync()
23+
.whereType<File>()
24+
.toList();
25+
files.sort((a, b) => a.path.compareTo(b.path));
26+
final result = <String, dynamic>{};
27+
for (final file in files) {
28+
try {
29+
final content = file.readAsStringSync();
30+
final list =
31+
await detectLicenseInContent(content, relativePath: 'LICENSE');
32+
final spdxIds = list.map((e) => e.spdxIdentifier).toList()..sort();
33+
final packageName =
34+
file.path.split('/').last.split('LICENSE-').last.split('.txt').first;
35+
result[packageName] = {
36+
'spdxIds': spdxIds,
37+
};
38+
} catch (_) {
39+
// TODO: also track errors
40+
}
41+
}
42+
await File(outputFileName)
43+
.writeAsString(const JsonEncoder.withIndent(' ').convert(result));
44+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:convert';
6+
import 'dart:io';
7+
8+
Future<void> main(List<String> args) async {
9+
if (args.isEmpty || args.contains('--help')) {
10+
print('dart batch_compare_analysis.dart <before.json> <after.json>');
11+
print('');
12+
print(
13+
'Compare analysis results and highlight changes using the output of the '
14+
'`batch_compare_licenses.dart script.');
15+
return;
16+
}
17+
final a = await _read(args[0]);
18+
final b = await _read(args[1]);
19+
20+
final changes = <String, List<String>>{};
21+
final keys = a.keys.toSet().intersection(b.keys.toSet());
22+
for (final key in keys) {
23+
final diff = a[key]!.diff(b[key]!);
24+
for (final d in diff) {
25+
changes.putIfAbsent(d, () => []).add(key);
26+
}
27+
}
28+
final entries = changes.entries.toList()
29+
..sort((a, b) => -a.value.length.compareTo(b.value.length));
30+
for (final entry in entries) {
31+
print(
32+
'${entry.value.length.toString().padLeft(6)} ${entry.key.padLeft(30)}: ${entry.value.take(5).join(', ')}');
33+
}
34+
}
35+
36+
class LicenseResult {
37+
final List<String> spdxIds;
38+
39+
LicenseResult({required this.spdxIds});
40+
factory LicenseResult.fromJson(Map<String, dynamic> input) {
41+
return LicenseResult(
42+
spdxIds:
43+
(input['spdxIds'] as List?)?.cast<String>() ?? const <String>[]);
44+
}
45+
46+
List<String> diff(LicenseResult other) {
47+
return <String>[
48+
...spdxIds.where((id) => !other.spdxIds.contains(id)).map((e) => '-$e'),
49+
...other.spdxIds.where((id) => !spdxIds.contains(id)).map((e) => '+$e'),
50+
];
51+
}
52+
}
53+
54+
Future<Map<String, LicenseResult>> _read(String inputFilePath) async {
55+
final content = await File(inputFilePath).readAsString();
56+
final data = json.decode(content) as Map<String, dynamic>;
57+
return data.map((key, value) =>
58+
MapEntry(key, LicenseResult.fromJson(value as Map<String, dynamic>)));
59+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
6+
PROJECT_DIR="$( cd ${SCRIPT_DIR}/../.. && pwd )"
7+
LICENSES_DIR="${PROJECT_DIR}/.dart_tool/pana/license-cache"
8+
9+
mkdir -p "${LICENSES_DIR}"
10+
cd "${LICENSES_DIR}"
11+
12+
# Get all package names
13+
get_all_package_names() { curl -s -H 'Accept-Encoding: gzip' 'https://pub.dev/api/package-names' | gzip -d | jq -r .packages[]; }
14+
15+
# Given a package name, get archive URL for latest version
16+
get_archive_url() { curl -sL "https://pub.dev/api/packages/$1" | jq -r .latest.archive_url; }
17+
18+
# Given a package name, get LICENSE file from latest version
19+
get_license() { curl -sL $(get_archive_url "$1") | tar -xzO --ignore-case LICENSE 2> /dev/null; }
20+
21+
# Given a package name, download license to LICENSE-<package>.txt
22+
download_license() { get_license "$1" > "LICENSE-$1.txt"; }
23+
24+
export -f get_all_package_names
25+
export -f get_archive_url
26+
export -f get_license
27+
export -f download_license
28+
29+
get_all_package_names | parallel -j 20 download_license

0 commit comments

Comments
 (0)
Please sign in to comment.