Skip to content

Commit df3dc96

Browse files
committed
Swift: fix missing extractions from Builtin
There were missing extractions from the Builtin (and other) modules. This was actually caused by two issues: * we did not visit all required modules, as for example the `Builtin` module does not appear as being imported by anybody (together with another mysterious `__Objc` module) * moreover the `Builtin` module works internally by only creating declarations on demand, and does not provide a list of its top level declarations. The first problem was solved by moving module collection to the actual visiting. This may mean we extract less modules, as we only extract the modules we actually use something from (recursively). This change can be reverted if we feel we need it. The second one was solved by explicitly listing the builtin symbols encountered during a normal extraction. This does mean this list needs to be kept up to date.
1 parent 606b9e6 commit df3dc96

File tree

6 files changed

+69
-48
lines changed

6 files changed

+69
-48
lines changed

swift/extractor/SwiftExtractor.cpp

Lines changed: 52 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <queue>
77

88
#include <swift/AST/SourceFile.h>
9+
#include <swift/AST/Builtins.h>
910
#include <swift/Basic/FileTypes.h>
1011
#include <llvm/ADT/SmallString.h>
1112
#include <llvm/Support/FileSystem.h>
@@ -68,22 +69,52 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri
6869
return module.getModuleFilename().str();
6970
}
7071

72+
static llvm::SmallVector<swift::ValueDecl*> getBuiltinDecls(swift::ModuleDecl& builtinModule) {
73+
llvm::SmallVector<swift::ValueDecl*> values;
74+
for (auto symbol : {
75+
"zeroInitializer", "BridgeObject", "Word", "NativeObject",
76+
"RawPointer", "Int1", "Int8", "Int16",
77+
"Int32", "Int64", "IntLiteral", "FPIEEE16",
78+
"FPIEEE32", "FPIEEE64", "FPIEEE80", "Vec2xInt8",
79+
"Vec4xInt8", "Vec8xInt8", "Vec16xInt8", "Vec32xInt8",
80+
"Vec64xInt8", "Vec2xInt16", "Vec4xInt16", "Vec8xInt16",
81+
"Vec16xInt16", "Vec32xInt16", "Vec64xInt16", "Vec2xInt32",
82+
"Vec4xInt32", "Vec8xInt32", "Vec16xInt32", "Vec32xInt32",
83+
"Vec64xInt32", "Vec2xInt64", "Vec4xInt64", "Vec8xInt64",
84+
"Vec16xInt64", "Vec32xInt64", "Vec64xInt64", "Vec2xFPIEEE16",
85+
"Vec4xFPIEEE16", "Vec8xFPIEEE16", "Vec16xFPIEEE16", "Vec32xFPIEEE16",
86+
"Vec64xFPIEEE16", "Vec2xFPIEEE32", "Vec4xFPIEEE32", "Vec8xFPIEEE32",
87+
"Vec16xFPIEEE32", "Vec32xFPIEEE32", "Vec64xFPIEEE32", "Vec2xFPIEEE64",
88+
"Vec4xFPIEEE64", "Vec8xFPIEEE64", "Vec16xFPIEEE64", "Vec32xFPIEEE64",
89+
"Vec64xFPIEEE64",
90+
}) {
91+
builtinModule.lookupValue(builtinModule.getASTContext().getIdentifier(symbol),
92+
swift::NLKind::QualifiedLookup, values);
93+
}
94+
return values;
95+
}
96+
7197
static llvm::SmallVector<swift::Decl*> getTopLevelDecls(swift::ModuleDecl& module,
7298
swift::SourceFile* primaryFile = nullptr) {
7399
llvm::SmallVector<swift::Decl*> ret;
74100
ret.push_back(&module);
75101
if (primaryFile) {
76102
primaryFile->getTopLevelDecls(ret);
103+
} else if (module.isBuiltinModule()) {
104+
for (auto d : getBuiltinDecls(module)) {
105+
ret.push_back(d);
106+
}
77107
} else {
78108
module.getTopLevelDecls(ret);
79109
}
80110
return ret;
81111
}
82112

83-
static void extractDeclarations(const SwiftExtractorConfiguration& config,
84-
swift::CompilerInstance& compiler,
85-
swift::ModuleDecl& module,
86-
swift::SourceFile* primaryFile = nullptr) {
113+
static std::unordered_set<swift::ModuleDecl*> extractDeclarations(
114+
const SwiftExtractorConfiguration& config,
115+
swift::CompilerInstance& compiler,
116+
swift::ModuleDecl& module,
117+
swift::SourceFile* primaryFile = nullptr) {
87118
auto filename = getFilename(module, primaryFile);
88119

89120
// The extractor can be called several times from different processes with
@@ -92,7 +123,7 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
92123
auto trapTarget = createTargetTrapFile(config, filename);
93124
if (!trapTarget) {
94125
// another process arrived first, nothing to do for us
95-
return;
126+
return {};
96127
}
97128
TrapDomain trap{*trapTarget};
98129

@@ -116,6 +147,7 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
116147
for (auto& comment : comments) {
117148
visitor.extract(comment);
118149
}
150+
return std::move(visitor).getEncounteredModules();
119151
}
120152

121153
static std::unordered_set<std::string> collectInputFilenames(swift::CompilerInstance& compiler) {
@@ -132,40 +164,18 @@ static std::unordered_set<std::string> collectInputFilenames(swift::CompilerInst
132164
return sourceFiles;
133165
}
134166

135-
static std::unordered_set<swift::ModuleDecl*> collectModules(swift::CompilerInstance& compiler) {
136-
// getASTContext().getLoadedModules() does not provide all the modules available within the
137-
// program.
138-
// We need to iterate over all the imported modules (recursively) to see the whole "universe."
139-
std::unordered_set<swift::ModuleDecl*> allModules;
140-
std::queue<swift::ModuleDecl*> worklist;
141-
for (auto& [_, module] : compiler.getASTContext().getLoadedModules()) {
142-
worklist.push(module);
143-
allModules.insert(module);
144-
}
145-
146-
while (!worklist.empty()) {
147-
auto module = worklist.front();
148-
worklist.pop();
149-
llvm::SmallVector<swift::ImportedModule> importedModules;
150-
// TODO: we may need more than just Exported ones
151-
module->getImportedModules(importedModules, swift::ModuleDecl::ImportFilterKind::Exported);
152-
for (auto& imported : importedModules) {
153-
if (allModules.count(imported.importedModule) == 0) {
154-
worklist.push(imported.importedModule);
155-
allModules.insert(imported.importedModule);
156-
}
157-
}
158-
}
159-
return allModules;
160-
}
161-
162167
void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
163168
swift::CompilerInstance& compiler) {
164169
auto inputFiles = collectInputFilenames(compiler);
165-
auto modules = collectModules(compiler);
170+
std::vector<swift::ModuleDecl*> todo = {compiler.getMainModule()};
171+
std::unordered_set<swift::ModuleDecl*> processed = {};
166172

167-
for (auto& module : modules) {
173+
while (!todo.empty()) {
174+
auto module = todo.back();
175+
todo.pop_back();
176+
llvm::errs() << "processing module " << module->getName() << '\n';
168177
bool isFromSourceFile = false;
178+
std::unordered_set<swift::ModuleDecl*> encounteredModules;
169179
for (auto file : module->getFiles()) {
170180
auto sourceFile = llvm::dyn_cast<swift::SourceFile>(file);
171181
if (!sourceFile) {
@@ -176,10 +186,16 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
176186
continue;
177187
}
178188
archiveFile(config, *sourceFile);
179-
extractDeclarations(config, compiler, *module, sourceFile);
189+
encounteredModules = extractDeclarations(config, compiler, *module, sourceFile);
180190
}
181191
if (!isFromSourceFile) {
182-
extractDeclarations(config, compiler, *module);
192+
encounteredModules = extractDeclarations(config, compiler, *module);
193+
}
194+
processed.insert(module);
195+
for (auto encountered : encounteredModules) {
196+
if (processed.count(encountered) == 0) {
197+
todo.push_back(encountered);
198+
}
183199
}
184200
}
185201
}

swift/extractor/infra/SwiftDispatcher.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ class SwiftDispatcher {
5555
}
5656
}
5757

58+
const std::unordered_set<swift::ModuleDecl*> getEncounteredModules() && {
59+
return std::move(encounteredModules);
60+
}
61+
5862
template <typename Entry>
5963
void emit(const Entry& entry) {
6064
trap.emit(entry);
@@ -228,8 +232,16 @@ class SwiftDispatcher {
228232
// - extracting a primary source file: in this mode, we extract several files belonging to the
229233
// same module one by one. In this mode, we restrict emission only to the same file ignoring
230234
// all the other files.
235+
// This is also used to register the modules we encounter.
236+
// TODO calls to this function should be taken away from `DeclVisitor` and moved around with a
237+
// clearer separation between naming entities (some decls, all types), deciding whether to emit
238+
// them and finally visiting emitting the contents of the entity (which should remain in the
239+
// visitors). Then this double responsibility (carrying out the test and registering encountered
240+
// modules) should also be cleared out
231241
bool shouldEmitDeclBody(const swift::Decl& decl) {
232-
if (decl.getModuleContext() != &currentModule) {
242+
auto module = decl.getModuleContext();
243+
if (module != &currentModule) {
244+
encounteredModules.insert(module);
233245
return false;
234246
}
235247
// ModuleDecl is a special case: if it passed the previous test, it is the current module
@@ -333,6 +345,7 @@ class SwiftDispatcher {
333345
Store::Handle waitingForNewLabel{std::monostate{}};
334346
swift::ModuleDecl& currentModule;
335347
swift::SourceFile* currentPrimarySourceFile;
348+
std::unordered_set<swift::ModuleDecl*> encounteredModules;
336349
};
337350

338351
} // namespace codeql

swift/extractor/visitors/SwiftVisitor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ namespace codeql {
1111

1212
class SwiftVisitor : private SwiftDispatcher {
1313
public:
14+
using SwiftDispatcher::getEncounteredModules;
1415
using SwiftDispatcher::SwiftDispatcher;
1516

1617
template <typename T>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
| file://:0:0:0:0 | A |
22
| file://:0:0:0:0 | B |
33
| file://:0:0:0:0 | PackageDescription |
4+
| file://:0:0:0:0 | __ObjC |
45
| file://:0:0:0:0 | main |
56
| file://:0:0:0:0 | partial_modules |
Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
| Builtin.BridgeObject | getName: | Builtin.BridgeObject | getCanonicalType: | Builtin.BridgeObject |
2-
| Builtin.DefaultActorStorage | getName: | Builtin.DefaultActorStorage | getCanonicalType: | Builtin.DefaultActorStorage |
3-
| Builtin.Executor | getName: | Builtin.Executor | getCanonicalType: | Builtin.Executor |
42
| Builtin.FPIEEE32 | getName: | Builtin.FPIEEE32 | getCanonicalType: | Builtin.FPIEEE32 |
53
| Builtin.FPIEEE64 | getName: | Builtin.FPIEEE64 | getCanonicalType: | Builtin.FPIEEE64 |
64
| Builtin.IntLiteral | getName: | Builtin.IntLiteral | getCanonicalType: | Builtin.IntLiteral |
7-
| Builtin.Job | getName: | Builtin.Job | getCanonicalType: | Builtin.Job |
85
| Builtin.NativeObject | getName: | Builtin.NativeObject | getCanonicalType: | Builtin.NativeObject |
96
| Builtin.RawPointer | getName: | Builtin.RawPointer | getCanonicalType: | Builtin.RawPointer |
10-
| Builtin.RawUnsafeContinuation | getName: | Builtin.RawUnsafeContinuation | getCanonicalType: | Builtin.RawUnsafeContinuation |
11-
| Builtin.UnsafeValueBuffer | getName: | Builtin.UnsafeValueBuffer | getCanonicalType: | Builtin.UnsafeValueBuffer |

swift/ql/test/extractor-tests/generated/type/BuiltinType/builtin_types.swift

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@ func foo(
44
_: Builtin.FPIEEE32,
55
_: Builtin.FPIEEE64,
66
_: Builtin.BridgeObject,
7-
_: Builtin.DefaultActorStorage,
8-
_: Builtin.Executor,
9-
_: Builtin.Job,
107
_: Builtin.NativeObject,
11-
_: Builtin.RawPointer,
12-
_: Builtin.RawUnsafeContinuation,
13-
_: Builtin.UnsafeValueBuffer
8+
_: Builtin.RawPointer
149
) {}

0 commit comments

Comments
 (0)