Skip to content

Commit 8872f65

Browse files
committed
Add new variable type analysis algorithm
1 parent d46d9e0 commit 8872f65

File tree

5 files changed

+1225
-0
lines changed

5 files changed

+1225
-0
lines changed

src/engine/internal/llvm/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ target_sources(scratchcpp
22
PRIVATE
33
llvmbuildutils.cpp
44
llvmbuildutils.h
5+
llvmcodeanalyzer.cpp
6+
llvmcodeanalyzer.h
57
llvmcodebuilder.cpp
68
llvmcodebuilder.h
79
llvmregister.h
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
#include "llvmcodeanalyzer.h"
4+
#include "llvminstructionlist.h"
5+
#include "llvminstruction.h"
6+
#include "llvmbuildutils.h"
7+
8+
using namespace libscratchcpp;
9+
10+
static const std::unordered_set<LLVMInstruction::Type>
11+
BEGIN_LOOP_INSTRUCTIONS = { LLVMInstruction::Type::BeginRepeatLoop, LLVMInstruction::Type::BeginWhileLoop, LLVMInstruction::Type::BeginRepeatUntilLoop };
12+
13+
void LLVMCodeAnalyzer::analyzeScript(const LLVMInstructionList &script) const
14+
{
15+
std::unordered_set<LLVMInstruction *> typeAssignedInstructions;
16+
std::vector<std::unique_ptr<Branch>> branches;
17+
LLVMInstruction *ins = script.first();
18+
19+
auto topBranch = std::make_unique<Branch>();
20+
topBranch->start = ins;
21+
Branch *topBranchPtr = topBranch.get();
22+
branches.push_back(std::move(topBranch));
23+
24+
Branch *currentBranch = branches.back().get();
25+
26+
while (ins) {
27+
if (isIfStart(ins) || isLoopStart(ins)) {
28+
auto branch = std::make_unique<Branch>();
29+
branch->start = ins;
30+
branch->variableTypes = currentBranch->variableTypes;
31+
currentBranch = branch.get();
32+
branches.push_back(std::move(branch));
33+
} else if (isElse(ins)) {
34+
assert(!currentBranch->elseBranch);
35+
36+
// Enter else branch with type information from the previous branch
37+
Branch *previousBranch = branches[branches.size() - 2].get();
38+
currentBranch->elseBranch = std::make_unique<Branch>();
39+
currentBranch = currentBranch->elseBranch.get();
40+
currentBranch->start = ins;
41+
currentBranch->variableTypes = previousBranch->variableTypes;
42+
} else if (isIfEnd(ins) || isLoopEnd(ins)) {
43+
if (isLoopEnd(ins) && currentBranch->typeChanges) {
44+
// Next iteration
45+
ins = currentBranch->start;
46+
currentBranch->typeChanges = false;
47+
} else {
48+
// Merge/override types
49+
Branch *previousBranch = branches[branches.size() - 2].get();
50+
Branch *primaryBranch = branches.back().get();
51+
52+
assert(primaryBranch);
53+
54+
if (primaryBranch && primaryBranch->elseBranch) {
55+
// The previous types can be ignored in if/else statements
56+
overrideBranchTypes(primaryBranch, previousBranch);
57+
mergeBranchTypes(primaryBranch->elseBranch.get(), previousBranch);
58+
} else
59+
mergeBranchTypes(primaryBranch, previousBranch);
60+
61+
// Remove the branch
62+
branches.pop_back();
63+
currentBranch = previousBranch;
64+
}
65+
} else if (isVariableWrite(ins)) {
66+
// Type before the write
67+
updateVariableType(currentBranch, ins, typeAssignedInstructions, true);
68+
69+
// Type after the write
70+
currentBranch->variableTypes[ins->targetVariable] = writeType(ins);
71+
} else if (isVariableRead(ins)) {
72+
// Type before the read
73+
updateVariableType(currentBranch, ins, typeAssignedInstructions, false);
74+
75+
// Store the type in the return register
76+
ins->functionReturnReg->setType(ins->targetType);
77+
}
78+
79+
ins = ins->next;
80+
}
81+
82+
assert(branches.size() == 1);
83+
assert(branches.back().get() == topBranchPtr);
84+
}
85+
86+
void LLVMCodeAnalyzer::updateVariableType(Branch *branch, LLVMInstruction *ins, std::unordered_set<LLVMInstruction *> &typeAssignedInstructions, bool isWrite) const
87+
{
88+
auto it = branch->variableTypes.find(ins->targetVariable);
89+
90+
if (it == branch->variableTypes.cend()) {
91+
if (typeAssignedInstructions.find(ins) == typeAssignedInstructions.cend()) {
92+
if (isWrite)
93+
branch->typeChanges = true;
94+
95+
typeAssignedInstructions.insert(ins);
96+
}
97+
} else {
98+
if (typeAssignedInstructions.find(ins) == typeAssignedInstructions.cend()) {
99+
if (isWrite)
100+
branch->typeChanges = true;
101+
102+
ins->targetType = it->second;
103+
typeAssignedInstructions.insert(ins);
104+
} else {
105+
if (isWrite && ((ins->targetType | it->second) != ins->targetType))
106+
branch->typeChanges = true;
107+
108+
ins->targetType |= it->second;
109+
}
110+
}
111+
}
112+
113+
void LLVMCodeAnalyzer::mergeBranchTypes(Branch *branch, Branch *previousBranch) const
114+
{
115+
// Variables
116+
for (const auto &[var, type] : branch->variableTypes) {
117+
auto it = previousBranch->variableTypes.find(var);
118+
119+
if (it == previousBranch->variableTypes.cend())
120+
previousBranch->variableTypes[var] = type;
121+
else
122+
it->second |= type;
123+
}
124+
}
125+
126+
void LLVMCodeAnalyzer::overrideBranchTypes(Branch *branch, Branch *previousBranch) const
127+
{
128+
// Variables
129+
for (const auto &[var, type] : branch->variableTypes)
130+
previousBranch->variableTypes[var] = type;
131+
}
132+
133+
bool LLVMCodeAnalyzer::isLoopStart(const LLVMInstruction *ins) const
134+
{
135+
return (BEGIN_LOOP_INSTRUCTIONS.find(ins->type) != BEGIN_LOOP_INSTRUCTIONS.cend());
136+
}
137+
138+
bool LLVMCodeAnalyzer::isLoopEnd(const LLVMInstruction *ins) const
139+
{
140+
return (ins->type == LLVMInstruction::Type::EndLoop);
141+
}
142+
143+
bool LLVMCodeAnalyzer::isIfStart(const LLVMInstruction *ins) const
144+
{
145+
return (ins->type == LLVMInstruction::Type::BeginIf);
146+
}
147+
148+
bool LLVMCodeAnalyzer::isElse(const LLVMInstruction *ins) const
149+
{
150+
return (ins->type == LLVMInstruction::Type::BeginElse);
151+
}
152+
153+
bool LLVMCodeAnalyzer::isIfEnd(const LLVMInstruction *ins) const
154+
{
155+
return (ins->type == LLVMInstruction::Type::EndIf);
156+
}
157+
158+
bool LLVMCodeAnalyzer::isVariableRead(const LLVMInstruction *ins) const
159+
{
160+
return (ins->type == LLVMInstruction::Type::ReadVariable);
161+
}
162+
163+
bool LLVMCodeAnalyzer::isVariableWrite(const LLVMInstruction *ins) const
164+
{
165+
return (ins->type == LLVMInstruction::Type::WriteVariable);
166+
}
167+
168+
Compiler::StaticType LLVMCodeAnalyzer::writeType(LLVMInstruction *ins) const
169+
{
170+
assert(ins);
171+
assert(!ins->args.empty());
172+
auto &arg = ins->args.back(); // value is always the last argument in variable/list write instructions
173+
const LLVMRegister *argReg = arg.second;
174+
175+
if (argReg->instruction) {
176+
// TODO: Handle list item
177+
if (isVariableRead(argReg->instruction.get())) {
178+
// Store the variable type in the value argument
179+
arg.first = argReg->instruction->functionReturnReg->type();
180+
}
181+
}
182+
183+
return LLVMBuildUtils::optimizeRegisterType(argReg);
184+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
#pragma once
4+
5+
#include <scratchcpp/compiler.h>
6+
#include <unordered_map>
7+
8+
namespace libscratchcpp
9+
{
10+
11+
class LLVMInstructionList;
12+
class LLVMInstruction;
13+
14+
class LLVMCodeAnalyzer
15+
{
16+
public:
17+
void analyzeScript(const LLVMInstructionList &script) const;
18+
19+
private:
20+
struct Branch
21+
{
22+
LLVMInstruction *start = nullptr;
23+
bool typeChanges = false;
24+
std::unordered_map<Variable *, Compiler::StaticType> variableTypes;
25+
26+
std::unique_ptr<Branch> elseBranch;
27+
};
28+
29+
void updateVariableType(Branch *branch, LLVMInstruction *ins, std::unordered_set<LLVMInstruction *> &typeAssignedInstructions, bool isWrite) const;
30+
void mergeBranchTypes(Branch *branch, Branch *previousBranch) const;
31+
void overrideBranchTypes(Branch *branch, Branch *previousBranch) const;
32+
33+
bool isLoopStart(const LLVMInstruction *ins) const;
34+
bool isLoopEnd(const LLVMInstruction *ins) const;
35+
bool isIfStart(const LLVMInstruction *ins) const;
36+
bool isElse(const LLVMInstruction *ins) const;
37+
bool isIfEnd(const LLVMInstruction *ins) const;
38+
39+
bool isVariableRead(const LLVMInstruction *ins) const;
40+
bool isVariableWrite(const LLVMInstruction *ins) const;
41+
42+
Compiler::StaticType writeType(LLVMInstruction *ins) const;
43+
};
44+
45+
} // namespace libscratchcpp

test/llvm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ add_executable(
2222
llvmexecutablecode_test.cpp
2323
llvmcodebuilder_test.cpp
2424
llvminstructionlist_test.cpp
25+
code_analyzer/variable_type_analysis.cpp
2526
operators/equal_comparison_test.cpp
2627
operators/greater_than_test.cpp
2728
operators/less_than_test.cpp

0 commit comments

Comments
 (0)