Skip to content

Commit ac22631

Browse files
committed
init soot-fact-generator
1 parent 4226a40 commit ac22631

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+8147
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
build

README.md

+54
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,56 @@
11
# soot-fact-generator
22
generate facts from bytecode (source is https://github.com/plast-lab/doop-mirror/tree/master/generators)
3+
4+
通过soot解析bytecode生成fact,类似于CodeQL创建database的过程,fact等价于codeql的database。
5+
6+
## build
7+
执行 `gradle build`进行编译,`soot-fact-generator.jar``build/libs` 目录
8+
9+
## usage
10+
11+
```
12+
Usage: soot-fact-generator [options] file...
13+
Options:
14+
--main <class> Specify the name of the main class.
15+
--ssa Generate SSA facts, enabling flow-sensitive analysis.
16+
--full Generate facts by full transitive resolution.
17+
--allow-phantom Allow phantom classes.
18+
-d <directory> Specify where to generate output fact files.
19+
-i <archive> Find classes in <archive>.
20+
-l <archive> Find library classes in <archive>.
21+
-ld <archive> Find dependency classes in <archive>.
22+
-lsystem Find classes in default system classes.
23+
--facts-subset <subset> Produce facts only for a subset of the given classes [APP, APP_N_DEPS, PLATFORM].
24+
--ignore-factgen-errors Continue with the analysis even if fact generation fails.
25+
--legacy-android-processing Enable legacy Android XML processing.
26+
--no-facts Don't generate facts (just empty files -- used for debugging).
27+
--ignore-wrong-staticness Ignore "wrong static-ness" errors in Soot.
28+
--lowMem Consume less memory.
29+
--failOnMissingClasses <file> Terminate if classes are missing (and record them to <file>).
30+
--also-resolve <class> Force resolution of class that may not be found automatically.
31+
--debug Enable debug mode (verbose output).
32+
--log-dir <dir> Write logs in directory <dir>.
33+
--args-file <file> Read command-line arguments from <file> (one per line).
34+
--write-artifacts-map Write artifacts map.
35+
Jimple/Shimple generation:
36+
--generate-jimple Generate Jimple/Shimple files in addition to other facts.
37+
--stdout Write Jimple/Shimple to stdout.
38+
Android options:
39+
--android-jars <archive> The main Android library JAR (for Android APK inputs). The same jar should be provided in the -l option.
40+
--decode-apk Decompress APK input in facts directory.
41+
--scan-native-code Scan native code found in JAR/APK inputs.
42+
--R-out-dir <directory> Specify where to generate R code (when linking AAR inputs).
43+
44+
Supported input archive formats: AAR, APK, JAR, ZIP
45+
```
46+
常见的用法是
47+
```
48+
java -jar soot-fact-generator.jar -i input.jar -l /usr/lib/jvm/java-8-oracle/jre/lib/rt.jar --generate-jimple --allow-phantom --full -d out
49+
```
50+
其中
51+
- `-i` 指定待分析的jar包
52+
- `-l` 指定依赖库
53+
- `--generate-jimple` 表示生成中间语言jimple
54+
- `--allow-phantom` 大概是允许解析依赖不存在的类
55+
- `--full` 表示对所有class进行解析
56+
- `-d` 指定输出目录

build.gradle

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
plugins {
2+
id 'application'
3+
id 'groovy'
4+
id 'java-library'
5+
id 'java'
6+
}
7+
8+
gradle.rootProject.ext.antlrVersion='4.9.1'
9+
gradle.rootProject.ext.commonsCliVersion='1.2'
10+
gradle.rootProject.ext.metadataVersion='2.4.1'
11+
gradle.rootProject.ext.groovyVersion='3.0.9'
12+
gradle.rootProject.ext.clueCommonVersion='3.25.3'
13+
gradle.rootProject.ext.log4jVersion='1.2.17'
14+
gradle.rootProject.ext.spockVersion='2.1-M2-groovy-3.0'
15+
gradle.rootProject.ext.asmVersion = '8.0.1'
16+
17+
sourceCompatibility = 1.8
18+
targetCompatibility = 1.8
19+
20+
mainClassName = 'org.clyze.doop.soot.Main'
21+
22+
repositories {
23+
mavenCentral()
24+
maven {
25+
name "soot-snapshot"
26+
url "https://soot-build.cs.uni-paderborn.de/nexus/repository/soot-snapshot"
27+
}
28+
maven {
29+
name "soot-release"
30+
url "https://soot-build.cs.uni-paderborn.de/nexus/repository/soot-release"
31+
}
32+
maven { url 'https://clyze.jfrog.io/artifactory/default-maven-local' }
33+
}
34+
35+
dependencies {
36+
//api project(path: ':generators:fact-generator-common', configuration: 'shadow')
37+
38+
// implementation "ca.mcgill.sable:soot:4.0.0"
39+
implementation "org.soot-oss:soot:4.2.1"
40+
41+
api("org.clyze:clue-common:${clueCommonVersion}") {
42+
exclude group: 'com.google.code.gson', module: 'gson'
43+
exclude group: 'org.apache.ivy', module: 'ivy'
44+
exclude group: 'org.codehaus.groovy', module: 'groovy-all'
45+
}
46+
api "org.clyze:metadata-model:${metadataVersion}"
47+
implementation "org.ow2.asm:asm-tree:${asmVersion}", // Java bytecode library
48+
"org.ow2.asm:asm-util:${asmVersion}"
49+
implementation 'org.smali:dexlib2:2.4.0' // Dexlib2: library for reading/modifying/writing Android dex files
50+
implementation 'net.dongliu:apk-parser:2.6.10' // needed for reading binary XML entries from APK inputs
51+
implementation "org.antlr:antlr4-runtime:${antlrVersion}"
52+
api 'org.clyze:native-scanner:0.6.6'
53+
api "log4j:log4j:${log4jVersion}" // Logging implementation
54+
implementation 'org.apache.commons:commons-collections4:4.1'
55+
56+
testImplementation "org.codehaus.groovy:groovy-all:${groovyVersion}" // Groovy
57+
testImplementation "org.spockframework:spock-core:${spockVersion}"
58+
}
59+
60+
//fatJar already exists
61+
task fatJar(type: Jar) {
62+
manifest {
63+
attributes 'Main-Class': mainClassName
64+
}
65+
from { configurations.compileClasspath.collect { it.directory ? it : zipTree(it) } }
66+
with jar
67+
duplicatesStrategy = DuplicatesStrategy.INCLUDE
68+
}
69+
70+
test {
71+
useJUnitPlatform()
72+
}
73+
74+
startScripts.dependsOn fatJar
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package org.clyze.doop.common;
2+
3+
import java.util.HashSet;
4+
import java.util.Set;
5+
6+
public class ArtifactEntry {
7+
// Class name inside an artifact.
8+
public final String className;
9+
// Sub artifact (such as "classes.dex" in an APK or "classes.jar" in an AAR).
10+
public final String subArtifact;
11+
// Size of entry.
12+
public final int size;
13+
14+
public ArtifactEntry(String className, String subArtifact, int size) {
15+
this.className = className;
16+
this.subArtifact = subArtifact;
17+
this.size = size;
18+
}
19+
20+
public static Set<String> toClassNames(Iterable<ArtifactEntry> s) {
21+
Set<String> ret = new HashSet<>();
22+
for (ArtifactEntry ae : s)
23+
ret.add(ae.className);
24+
return ret;
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
package org.clyze.doop.common;
2+
3+
import java.io.File;
4+
import java.io.FileInputStream;
5+
import java.io.InputStream;
6+
import java.io.IOException;
7+
import java.nio.file.Files;
8+
import java.util.*;
9+
import java.util.concurrent.*;
10+
import java.util.function.Consumer;
11+
import java.util.zip.ZipEntry;
12+
import java.util.zip.ZipFile;
13+
import java.util.zip.ZipInputStream;
14+
15+
import org.apache.commons.io.IOUtils;
16+
import org.apache.log4j.Logger;
17+
import org.clyze.scanner.BinaryAnalysis;
18+
import org.clyze.scanner.NativeDatabaseConsumer;
19+
import org.clyze.scanner.NativeScanner;
20+
import org.clyze.utils.TypeUtils;
21+
import org.jf.dexlib2.Opcodes;
22+
import org.jf.dexlib2.dexbacked.DexBackedDexFile;
23+
import org.jf.dexlib2.iface.MultiDexContainer;
24+
import org.jf.dexlib2.dexbacked.DexBackedClassDef;
25+
import org.objectweb.asm.ClassReader;
26+
//import org.objectweb.asm.ClassVisitor;
27+
//import org.objectweb.asm.tree.ClassNode;
28+
29+
import static org.clyze.scanner.BinaryAnalysis.AnalysisType.*;
30+
import static org.jf.dexlib2.DexFileFactory.loadDexContainer;
31+
//import static org.objectweb.asm.Opcodes.*;
32+
33+
/**
34+
* This class scans input artifacts (.jar, .aar, or .apk files) and
35+
* registers each found class. Optional actions can also be performed
36+
* on the artifact entries, when they are scanned.
37+
*/
38+
public class ArtifactScanner {
39+
40+
private final Map<String, Set<ArtifactEntry>> artifactToClassMap = new ConcurrentHashMap<>();
41+
private final Logger logger = Logger.getLogger(getClass());
42+
private final Set<GenericFieldInfo> genericFields = ConcurrentHashMap.newKeySet();
43+
44+
Set<GenericFieldInfo> getGenericFields() { return genericFields; }
45+
46+
public Map<String, Set<ArtifactEntry>> getArtifactToClassMap() {
47+
return artifactToClassMap;
48+
}
49+
50+
/**
51+
* Registers a class with its container artifact.
52+
* @param artifact the file name of the artifact containing the class
53+
* @param className the name of the class
54+
* @param subArtifact the sub-artifact (such as "classes.dex" for APKs)
55+
* @param size the size of the class
56+
*/
57+
public void registerArtifactClass(String artifact, String className, String subArtifact, int size) {
58+
ArtifactEntry ae = new ArtifactEntry(className, subArtifact, size);
59+
artifactToClassMap.computeIfAbsent(artifact, x -> new CopyOnWriteArraySet<>()).add(ae);
60+
}
61+
62+
/**
63+
* Register .dex entries and perform actions over .dex entries (if
64+
* processor is not null).
65+
*
66+
* @param inputApk the path of the input APK file
67+
* @param classProc the processor for .class entries (takes entry name)
68+
*/
69+
public void processAPKClasses(String inputApk, Consumer<String> classProc) {
70+
try {
71+
Opcodes opcodes = Opcodes.getDefault();
72+
File apk = new File(inputApk);
73+
MultiDexContainer<? extends DexBackedDexFile> multiDex = loadDexContainer(apk, opcodes);
74+
for (String dexEntry : multiDex.getDexEntryNames()) {
75+
MultiDexContainer.DexEntry<?> dex = multiDex.getEntry(dexEntry);
76+
if (dex == null)
77+
logger.debug("No .dex entry for " + dexEntry);
78+
else
79+
for (DexBackedClassDef dexClass : ((DexBackedDexFile)dex.getDexFile()).getClasses()) {
80+
String className = TypeUtils.raiseTypeId(dexClass.getType());
81+
registerArtifactClass(apk.getName(), className, dexEntry, dexClass.getSize());
82+
if (classProc != null)
83+
classProc.accept(className);
84+
}
85+
}
86+
} catch (Exception ex) {
87+
logger.debug("Error while calculating artifacts on Android: " + ex.getMessage());
88+
}
89+
}
90+
91+
public void processClass(InputStream is, File f, Consumer<String> classProc) throws IOException {
92+
String className = BytecodeUtil.getClassName(new ClassReader(is));
93+
String artifact = f.getName();
94+
registerArtifactClass(artifact, className, "-", IOUtils.toByteArray(is).length);
95+
if (classProc != null)
96+
classProc.accept(className);
97+
98+
// ClassNode cn = new ClassNode(ASM5);
99+
// reader.accept(cn, ClassReader.EXPAND_FRAMES);
100+
// ClassVisitor genericSignaturesRetriever = new GenericSignaturesRetriever(ASM5);
101+
// cn.accept(genericSignaturesRetriever);
102+
// Set<GenericFieldInfo> classGenericFields = ((GenericSignaturesRetriever) genericSignaturesRetriever).getGenericFields();
103+
// this.genericFields.addAll(classGenericFields);
104+
}
105+
106+
/**
107+
* Register archive (.class) entries and perform actions over
108+
* other types of entries (if processors are not null).
109+
*
110+
* @param input the path of the input archive
111+
* @param classProc the processor for .class entries (takes entry name)
112+
* @param generalProc the general processor for all other entries
113+
*/
114+
public void processArchive(String input, Consumer<String> classProc,
115+
EntryProcessor generalProc) throws IOException {
116+
try (ZipInputStream zin = new ZipInputStream(new FileInputStream(input)); ZipFile zipFile = new ZipFile(input)) {
117+
ZipEntry entry;
118+
while ((entry = zin.getNextEntry()) != null) {
119+
/* Skip directories */
120+
if (entry.isDirectory())
121+
continue;
122+
123+
String entryName = entry.getName().toLowerCase();
124+
if (entryName.endsWith(".class")) {
125+
try {
126+
processClass(zipFile.getInputStream(entry), new File(zipFile.getName()), classProc);
127+
} catch (Exception ex) {
128+
ex.printStackTrace();
129+
System.err.println("Error while preprocessing entry \"" + entryName + "\", it will be ignored.");
130+
}
131+
} else if (generalProc != null)
132+
generalProc.accept(zipFile, entry, entryName);
133+
}
134+
}
135+
}
136+
137+
/**
138+
* Helper method to extract an entry inside a ZIP archive and save
139+
* it as a file.
140+
*
141+
* @param tmpDirName a name for the intermediate temporary directory
142+
* @param zipFile the ZIP archive
143+
* @param entry the archive entry
144+
* @param entryName the name of the entry
145+
* @return the output file
146+
*/
147+
public static File extractZipEntryAsFile(String tmpDirName, ZipFile zipFile, ZipEntry entry, String entryName) throws IOException {
148+
File tmpDir = Files.createTempDirectory(tmpDirName).toFile();
149+
tmpDir.deleteOnExit();
150+
String tmpName = entryName.replaceAll(File.separator, "_");
151+
File libTmpFile = new File(tmpDir, tmpName);
152+
libTmpFile.deleteOnExit();
153+
Files.copy(zipFile.getInputStream(entry), libTmpFile.toPath());
154+
return libTmpFile;
155+
}
156+
157+
public static void scanNativeCode(Database db, Parameters parameters,
158+
Set<String> methodStrings) {
159+
NativeDatabaseConsumer dbc = new DatabaseConnector(db);
160+
BinaryAnalysis.AnalysisType analysisType;
161+
if (parameters._nativeRadare)
162+
analysisType = RADARE;
163+
else if (parameters._nativeBuiltin)
164+
analysisType = BUILTIN;
165+
else if (parameters._nativeBinutils)
166+
analysisType = BINUTILS;
167+
else {
168+
analysisType = BUILTIN;
169+
System.out.println("No binary analysis type given, using default: " + analysisType.name());
170+
}
171+
scanNativeInputs(dbc, analysisType, parameters._preciseNativeStrings, methodStrings, parameters.getInputs());
172+
}
173+
174+
private static void scanNativeInputs(NativeDatabaseConsumer dbc,
175+
BinaryAnalysis.AnalysisType binAnalysisType,
176+
boolean preciseNativeStrings,
177+
Set<String> methodStrings,
178+
Iterable<String> inputs) {
179+
final boolean demangle = false;
180+
final boolean truncateAddresses = true;
181+
final NativeScanner scanner = new NativeScanner(true, methodStrings);
182+
183+
EntryProcessor gProc = (file, entry, entryName) -> scanner.scanArchiveEntry(dbc, binAnalysisType, preciseNativeStrings, truncateAddresses, demangle, file, entry, entryName);
184+
for (String input : inputs) {
185+
System.out.println("Processing native code in input: " + input);
186+
try {
187+
(new ArtifactScanner()).processArchive(input, null, gProc);
188+
} catch (IOException ex) {
189+
ex.printStackTrace();
190+
}
191+
}
192+
}
193+
194+
@FunctionalInterface
195+
public interface EntryProcessor {
196+
/**
197+
* Process an entry inside a .zip (JAR/AAR/APK) file.
198+
*
199+
* @param file the ZIP file
200+
* @param entry the ZIP entry
201+
* @param entryName the name of the ZIP entry
202+
*/
203+
void accept(ZipFile file, ZipEntry entry, String entryName) throws IOException;
204+
}
205+
}

0 commit comments

Comments
 (0)