Skip to content

Commit 369394f

Browse files
committed
LLVMCodeBuilder: Switch to new string API
1 parent 00c9bee commit 369394f

File tree

6 files changed

+137
-87
lines changed

6 files changed

+137
-87
lines changed

src/engine/internal/llvm/llvmcodebuilder.cpp

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
9797

9898
std::vector<LLVMIfStatement> ifStatements;
9999
std::vector<LLVMLoop> loops;
100-
m_heap.clear();
100+
m_stringHeap.clear();
101101

102102
// Create variable pointers
103103
for (auto &[var, varPtr] : m_variablePtrs) {
@@ -107,7 +107,7 @@ std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
107107
varPtr.heapPtr = ptr;
108108

109109
// All variables are currently created on the stack and synced later (seems to be faster)
110-
// NOTE: Strings are NOT copied, only the pointer and string size are copied
110+
// NOTE: Strings are NOT copied, only the pointer is copied
111111
varPtr.stackPtr = m_builder.CreateAlloca(m_valueDataType);
112112

113113
// If there are no write operations outside loops, initialize the stack variable now
@@ -177,7 +177,7 @@ std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
177177
step.functionReturnReg->value = ret;
178178

179179
if (step.functionReturnReg->type() == Compiler::StaticType::String)
180-
freeLater(step.functionReturnReg->value);
180+
freeStringLater(step.functionReturnReg->value);
181181
}
182182

183183
break;
@@ -863,7 +863,7 @@ std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
863863
assert(step.args.size() == 0);
864864
const LLVMListPtr &listPtr = m_listPtrs[step.workList];
865865
llvm::Value *ptr = m_builder.CreateCall(resolve_list_to_string(), listPtr.ptr);
866-
freeLater(ptr);
866+
freeStringLater(ptr);
867867
step.functionReturnReg->value = ptr;
868868
break;
869869
}
@@ -1215,7 +1215,7 @@ std::shared_ptr<ExecutableCode> LLVMCodeBuilder::finalize()
12151215
m_builder.CreateBr(endBranch);
12161216

12171217
m_builder.SetInsertPoint(endBranch);
1218-
assert(m_heap.size() == 1);
1218+
assert(m_stringHeap.size() == 1);
12191219
freeScopeHeap();
12201220
syncVariables(targetVariables);
12211221

@@ -1764,6 +1764,7 @@ void LLVMCodeBuilder::initTypes()
17641764
{
17651765
llvm::PointerType *pointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0);
17661766
m_valueDataType = LLVMTypes::createValueDataType(&m_builder);
1767+
m_stringPtrType = LLVMTypes::createStringPtrType(&m_builder);
17671768
m_resumeFuncType = llvm::FunctionType::get(m_builder.getInt1Ty(), pointerType, false);
17681769
}
17691770

@@ -1841,7 +1842,7 @@ void LLVMCodeBuilder::pushScopeLevel()
18411842
} else
18421843
m_scopeLists.push_back(m_scopeLists.back());
18431844

1844-
m_heap.push_back({});
1845+
m_stringHeap.push_back({});
18451846
}
18461847

18471848
void LLVMCodeBuilder::popScopeLevel()
@@ -1865,7 +1866,7 @@ void LLVMCodeBuilder::popScopeLevel()
18651866
m_scopeLists.pop_back();
18661867

18671868
freeScopeHeap();
1868-
m_heap.pop_back();
1869+
m_stringHeap.pop_back();
18691870
}
18701871

18711872
void LLVMCodeBuilder::pushLoopScope(bool buildPhase)
@@ -1964,26 +1965,26 @@ llvm::Value *LLVMCodeBuilder::addAlloca(llvm::Type *type)
19641965
return ret;
19651966
}
19661967

1967-
void LLVMCodeBuilder::freeLater(llvm::Value *value)
1968+
void LLVMCodeBuilder::freeStringLater(llvm::Value *value)
19681969
{
1969-
assert(!m_heap.empty());
1970+
assert(!m_stringHeap.empty());
19701971

1971-
if (m_heap.empty())
1972+
if (m_stringHeap.empty())
19721973
return;
19731974

1974-
m_heap.back().push_back(value);
1975+
m_stringHeap.back().push_back(value);
19751976
}
19761977

19771978
void LLVMCodeBuilder::freeScopeHeap()
19781979
{
1979-
if (m_heap.empty())
1980+
if (m_stringHeap.empty())
19801981
return;
19811982

1982-
// Free dynamically allocated memory in current scope
1983-
auto &heap = m_heap.back();
1983+
// Free strings in current scope
1984+
auto &heap = m_stringHeap.back();
19841985

19851986
for (llvm::Value *ptr : heap)
1986-
m_builder.CreateFree(ptr);
1987+
m_builder.CreateCall(resolve_string_pool_free(), { ptr });
19871988

19881989
heap.clear();
19891990
}
@@ -2056,15 +2057,16 @@ llvm::Value *LLVMCodeBuilder::castValue(LLVMRegister *reg, Compiler::StaticType
20562057
case Compiler::StaticType::Bool:
20572058
case Compiler::StaticType::Unknown: {
20582059
// Cast to string
2059-
llvm::Value *ptr = m_builder.CreateCall(resolve_value_toCString(), reg->value);
2060-
freeLater(ptr);
2060+
// TODO: Use value_stringToDouble() and value_stringToBool()
2061+
llvm::Value *ptr = m_builder.CreateCall(resolve_value_toStringPtr(), reg->value);
2062+
freeStringLater(ptr);
20612063
return ptr;
20622064
}
20632065

20642066
case Compiler::StaticType::String: {
20652067
// Read string pointer directly
20662068
llvm::Value *ptr = m_builder.CreateStructGEP(m_valueDataType, reg->value, 0);
2067-
return m_builder.CreateLoad(llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0), ptr);
2069+
return m_builder.CreateLoad(m_stringPtrType->getPointerTo(), ptr);
20682070
}
20692071

20702072
default:
@@ -2119,14 +2121,14 @@ llvm::Value *LLVMCodeBuilder::castRawValue(LLVMRegister *reg, Compiler::StaticTy
21192121
switch (reg->type()) {
21202122
case Compiler::StaticType::Number: {
21212123
// Convert double to string
2122-
llvm::Value *ptr = m_builder.CreateCall(resolve_value_doubleToCString(), reg->value);
2123-
freeLater(ptr);
2124+
llvm::Value *ptr = m_builder.CreateCall(resolve_value_doubleToStringPtr(), reg->value);
2125+
freeStringLater(ptr);
21242126
return ptr;
21252127
}
21262128

21272129
case Compiler::StaticType::Bool: {
21282130
// Convert bool to string
2129-
llvm::Value *ptr = m_builder.CreateCall(resolve_value_boolToCString(), reg->value);
2131+
llvm::Value *ptr = m_builder.CreateCall(resolve_value_boolToStringPtr(), reg->value);
21302132
// NOTE: Dot not deallocate later
21312133
return ptr;
21322134
}
@@ -2153,8 +2155,23 @@ llvm::Constant *LLVMCodeBuilder::castConstValue(const Value &value, Compiler::St
21532155
case Compiler::StaticType::Bool:
21542156
return m_builder.getInt1(value.toBool());
21552157

2156-
case Compiler::StaticType::String:
2157-
return m_builder.CreateGlobalStringPtr(value.toString());
2158+
case Compiler::StaticType::String: {
2159+
std::u16string str = value.toUtf16();
2160+
2161+
// Create a constant array for the string
2162+
std::vector<llvm::Constant *> elements;
2163+
for (char16_t ch : str)
2164+
elements.push_back(m_builder.getInt16(ch));
2165+
2166+
elements.push_back(m_builder.getInt16(0)); // null terminator
2167+
2168+
llvm::ArrayType *arrayType = llvm::ArrayType::get(m_builder.getInt16Ty(), elements.size());
2169+
llvm::Constant *constArray = llvm::ConstantArray::get(arrayType, elements);
2170+
2171+
llvm::Constant *globalStr = new llvm::GlobalVariable(*m_module, arrayType, true, llvm::GlobalValue::PrivateLinkage, constArray, "string");
2172+
llvm::Constant *stringStruct = llvm::ConstantStruct::get(m_stringPtrType, { globalStr, m_builder.getInt64(str.size()), m_builder.getInt64(str.size() + 1) });
2173+
return new llvm::GlobalVariable(*m_module, m_stringPtrType, true, llvm::GlobalValue::PrivateLinkage, stringStruct, "stringPtr");
2174+
}
21582175

21592176
default:
21602177
assert(false);
@@ -2186,7 +2203,7 @@ llvm::Type *LLVMCodeBuilder::getType(Compiler::StaticType type)
21862203
return m_builder.getInt1Ty();
21872204

21882205
case Compiler::StaticType::String:
2189-
return llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0);
2206+
return m_stringPtrType->getPointerTo();
21902207

21912208
default:
21922209
assert(false);
@@ -2579,7 +2596,7 @@ void LLVMCodeBuilder::createValueStore(LLVMRegister *reg, llvm::Value *targetPtr
25792596
break;
25802597

25812598
case Compiler::StaticType::String:
2582-
m_builder.CreateCall(resolve_value_assign_cstring(), { targetPtr, converted });
2599+
m_builder.CreateCall(resolve_value_assign_stringPtr(), { targetPtr, converted });
25832600
break;
25842601

25852602
case Compiler::StaticType::Unknown:
@@ -2613,7 +2630,6 @@ void LLVMCodeBuilder::createValueCopy(llvm::Value *source, llvm::Value *target)
26132630
copyStructField(source, target, 0, m_valueDataType, m_builder.getInt64Ty()); // value
26142631
copyStructField(source, target, 1, m_valueDataType, m_builder.getInt32Ty()); // type
26152632
/* 2: padding */
2616-
copyStructField(source, target, 3, m_valueDataType, m_builder.getInt64Ty()); // string size
26172633
}
26182634

26192635
void LLVMCodeBuilder::copyStructField(llvm::Value *source, llvm::Value *target, int index, llvm::StructType *structType, llvm::Type *fieldType)
@@ -2707,7 +2723,7 @@ llvm::Value *LLVMCodeBuilder::createValue(LLVMRegister *reg)
27072723

27082724
llvm::Constant *type = m_builder.getInt32(static_cast<uint32_t>(reg->constValue().type()));
27092725
llvm::Constant *padding = m_builder.getInt32(0);
2710-
llvm::Constant *constValue = llvm::ConstantStruct::get(m_valueDataType, { value, type, padding, m_builder.getInt64(0) });
2726+
llvm::Constant *constValue = llvm::ConstantStruct::get(m_valueDataType, { value, type, padding });
27112727
m_builder.CreateStore(constValue, ret);
27122728

27132729
return ret;
@@ -2896,7 +2912,7 @@ llvm::Value *LLVMCodeBuilder::createComparison(LLVMRegister *arg1, LLVMRegister
28962912

28972913
case Compiler::StaticType::String: {
28982914
// Compare two strings
2899-
llvm::Value *cmpRet = m_builder.CreateCall(resolve_strcasecmp(), { value1, value2 });
2915+
llvm::Value *cmpRet = m_builder.CreateCall(resolve_string_compare_case_insensitive(), { value1, value2 });
29002916

29012917
switch (type) {
29022918
case Comparison::EQ:
@@ -2934,6 +2950,7 @@ llvm::Value *LLVMCodeBuilder::createStringComparison(LLVMRegister *arg1, LLVMReg
29342950
if (caseSensitive)
29352951
result = arg1->constValue().toString() == arg2->constValue().toString();
29362952
else {
2953+
// TODO: Use a custom comparison function
29372954
std::string str1 = arg1->constValue().toString();
29382955
std::string str2 = arg2->constValue().toString();
29392956
result = strcasecmp(str1.c_str(), str2.c_str()) == 0;
@@ -2950,7 +2967,7 @@ llvm::Value *LLVMCodeBuilder::createStringComparison(LLVMRegister *arg1, LLVMReg
29502967
// Explicitly cast to string
29512968
llvm::Value *string1 = castValue(arg1, Compiler::StaticType::String);
29522969
llvm::Value *string2 = castValue(arg2, Compiler::StaticType::String);
2953-
llvm::Value *cmp = m_builder.CreateCall(caseSensitive ? resolve_strcmp() : resolve_strcasecmp(), { string1, string2 });
2970+
llvm::Value *cmp = m_builder.CreateCall(caseSensitive ? resolve_string_compare_case_sensitive() : resolve_string_compare_case_insensitive(), { string1, string2 });
29542971
return m_builder.CreateICmpEQ(cmp, m_builder.getInt32(0));
29552972
}
29562973
}
@@ -3010,11 +3027,9 @@ llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_bool()
30103027
return resolveFunction("value_assign_bool", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_builder.getInt1Ty() }, false));
30113028
}
30123029

3013-
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_cstring()
3030+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_stringPtr()
30143031
{
3015-
return resolveFunction(
3016-
"value_assign_cstring",
3017-
llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0) }, false));
3032+
return resolveFunction("value_assign_stringPtr", llvm::FunctionType::get(m_builder.getVoidTy(), { m_valueDataType->getPointerTo(), m_stringPtrType->getPointerTo() }, false));
30183033
}
30193034

30203035
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_assign_special()
@@ -3043,22 +3058,25 @@ llvm::FunctionCallee LLVMCodeBuilder::resolve_value_toBool()
30433058
return callee;
30443059
}
30453060

3046-
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_toCString()
3061+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_toStringPtr()
30473062
{
30483063
// NOTE: This function can't be marked read-only because it allocates on the heap
3049-
return resolveFunction("value_toCString", llvm::FunctionType::get(llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0), m_valueDataType->getPointerTo(), false));
3064+
return resolveFunction("value_toStringPtr", llvm::FunctionType::get(m_stringPtrType->getPointerTo(), m_valueDataType->getPointerTo(), false));
30503065
}
30513066

3052-
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_doubleToCString()
3067+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_doubleToStringPtr()
30533068
{
30543069
// NOTE: This function can't be marked read-only because it allocates on the heap
3055-
return resolveFunction("value_doubleToCString", llvm::FunctionType::get(llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0), m_builder.getDoubleTy(), false));
3070+
return resolveFunction("value_doubleToStringPtr", llvm::FunctionType::get(m_stringPtrType->getPointerTo(), m_builder.getDoubleTy(), false));
30563071
}
30573072

3058-
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_boolToCString()
3073+
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_boolToStringPtr()
30593074
{
3060-
// NOTE: This function can't be marked read-only because it allocates on the heap
3061-
return resolveFunction("value_boolToCString", llvm::FunctionType::get(llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0), m_builder.getInt1Ty(), false));
3075+
// NOTE: This function can be marked read-only because it does NOT allocate on the heap ("true" and "false" constants)
3076+
llvm::FunctionCallee callee = resolveFunction("value_boolToStringPtr", llvm::FunctionType::get(m_stringPtrType->getPointerTo(), m_builder.getInt1Ty(), false));
3077+
llvm::Function *func = llvm::cast<llvm::Function>(callee.getCallee());
3078+
func->addFnAttr(llvm::Attribute::ReadOnly);
3079+
return callee;
30623080
}
30633081

30643082
llvm::FunctionCallee LLVMCodeBuilder::resolve_value_stringToDouble()
@@ -3158,7 +3176,7 @@ llvm::FunctionCallee LLVMCodeBuilder::resolve_list_alloc_size_ptr()
31583176
llvm::FunctionCallee LLVMCodeBuilder::resolve_list_to_string()
31593177
{
31603178
llvm::Type *pointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0);
3161-
llvm::FunctionCallee callee = resolveFunction("list_to_string", llvm::FunctionType::get(pointerType, { pointerType }, false));
3179+
llvm::FunctionCallee callee = resolveFunction("list_to_string", llvm::FunctionType::get(m_stringPtrType->getPointerTo(), { pointerType }, false));
31623180
llvm::Function *func = llvm::cast<llvm::Function>(callee.getCallee());
31633181
func->addFnAttr(llvm::Attribute::ReadOnly);
31643182
return callee;
@@ -3189,19 +3207,29 @@ llvm::FunctionCallee LLVMCodeBuilder::resolve_llvm_random_bool()
31893207
return resolveFunction("llvm_random_bool", llvm::FunctionType::get(m_builder.getDoubleTy(), { pointerType, m_builder.getInt1Ty(), m_builder.getInt1Ty() }, false));
31903208
}
31913209

3192-
llvm::FunctionCallee LLVMCodeBuilder::resolve_strcmp()
3210+
llvm::FunctionCallee LLVMCodeBuilder::resolve_string_pool_new()
31933211
{
3194-
llvm::Type *pointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0);
3195-
llvm::FunctionCallee callee = resolveFunction("strcmp", llvm::FunctionType::get(m_builder.getInt32Ty(), { pointerType, pointerType }, false));
3212+
return resolveFunction("string_pool_new", llvm::FunctionType::get(m_stringPtrType->getPointerTo(), false));
3213+
}
3214+
3215+
llvm::FunctionCallee LLVMCodeBuilder::resolve_string_pool_free()
3216+
{
3217+
return resolveFunction("string_pool_free", llvm::FunctionType::get(m_builder.getVoidTy(), { m_stringPtrType->getPointerTo() }, false));
3218+
}
3219+
3220+
llvm::FunctionCallee LLVMCodeBuilder::resolve_string_compare_case_sensitive()
3221+
{
3222+
llvm::Type *stringPtr = m_stringPtrType->getPointerTo();
3223+
llvm::FunctionCallee callee = resolveFunction("string_compare_case_sensitive", llvm::FunctionType::get(m_builder.getInt32Ty(), { stringPtr, stringPtr }, false));
31963224
llvm::Function *func = llvm::cast<llvm::Function>(callee.getCallee());
31973225
func->addFnAttr(llvm::Attribute::ReadOnly);
31983226
return callee;
31993227
}
32003228

3201-
llvm::FunctionCallee LLVMCodeBuilder::resolve_strcasecmp()
3229+
llvm::FunctionCallee LLVMCodeBuilder::resolve_string_compare_case_insensitive()
32023230
{
3203-
llvm::Type *pointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(m_llvmCtx), 0);
3204-
llvm::FunctionCallee callee = resolveFunction("strcasecmp", llvm::FunctionType::get(m_builder.getInt32Ty(), { pointerType, pointerType }, false));
3231+
llvm::Type *stringPtr = m_stringPtrType->getPointerTo();
3232+
llvm::FunctionCallee callee = resolveFunction("string_compare_case_insensitive", llvm::FunctionType::get(m_builder.getInt32Ty(), { stringPtr, stringPtr }, false));
32053233
llvm::Function *func = llvm::cast<llvm::Function>(callee.getCallee());
32063234
func->addFnAttr(llvm::Attribute::ReadOnly);
32073235
return callee;

src/engine/internal/llvm/llvmcodebuilder.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ class LLVMCodeBuilder : public ICodeBuilder
132132
LLVMRegister *addReg(std::shared_ptr<LLVMRegister> reg, std::shared_ptr<LLVMInstruction> ins);
133133

134134
llvm::Value *addAlloca(llvm::Type *type);
135-
void freeLater(llvm::Value *value);
135+
void freeStringLater(llvm::Value *value);
136136
void freeScopeHeap();
137137
llvm::Value *castValue(LLVMRegister *reg, Compiler::StaticType targetType);
138138
llvm::Value *castRawValue(LLVMRegister *reg, Compiler::StaticType targetType);
@@ -177,14 +177,14 @@ class LLVMCodeBuilder : public ICodeBuilder
177177
llvm::FunctionCallee resolve_value_assign_long();
178178
llvm::FunctionCallee resolve_value_assign_double();
179179
llvm::FunctionCallee resolve_value_assign_bool();
180-
llvm::FunctionCallee resolve_value_assign_cstring();
180+
llvm::FunctionCallee resolve_value_assign_stringPtr();
181181
llvm::FunctionCallee resolve_value_assign_special();
182182
llvm::FunctionCallee resolve_value_assign_copy();
183183
llvm::FunctionCallee resolve_value_toDouble();
184184
llvm::FunctionCallee resolve_value_toBool();
185-
llvm::FunctionCallee resolve_value_toCString();
186-
llvm::FunctionCallee resolve_value_doubleToCString();
187-
llvm::FunctionCallee resolve_value_boolToCString();
185+
llvm::FunctionCallee resolve_value_toStringPtr();
186+
llvm::FunctionCallee resolve_value_doubleToStringPtr();
187+
llvm::FunctionCallee resolve_value_boolToStringPtr();
188188
llvm::FunctionCallee resolve_value_stringToDouble();
189189
llvm::FunctionCallee resolve_value_stringToBool();
190190
llvm::FunctionCallee resolve_value_equals();
@@ -202,8 +202,10 @@ class LLVMCodeBuilder : public ICodeBuilder
202202
llvm::FunctionCallee resolve_llvm_random_double();
203203
llvm::FunctionCallee resolve_llvm_random_long();
204204
llvm::FunctionCallee resolve_llvm_random_bool();
205-
llvm::FunctionCallee resolve_strcmp();
206-
llvm::FunctionCallee resolve_strcasecmp();
205+
llvm::FunctionCallee resolve_string_pool_new();
206+
llvm::FunctionCallee resolve_string_pool_free();
207+
llvm::FunctionCallee resolve_string_compare_case_sensitive();
208+
llvm::FunctionCallee resolve_string_compare_case_insensitive();
207209

208210
Target *m_target = nullptr;
209211

@@ -222,6 +224,7 @@ class LLVMCodeBuilder : public ICodeBuilder
222224
llvm::Function *m_function = nullptr;
223225

224226
llvm::StructType *m_valueDataType = nullptr;
227+
llvm::StructType *m_stringPtrType = nullptr;
225228
llvm::FunctionType *m_resumeFuncType = nullptr;
226229

227230
std::vector<std::shared_ptr<LLVMInstruction>> m_instructions;
@@ -239,7 +242,7 @@ class LLVMCodeBuilder : public ICodeBuilder
239242
bool m_loopCondition = false; // whether we're currently compiling a loop condition
240243
std::vector<std::shared_ptr<LLVMInstruction>> m_variableInstructions;
241244
std::vector<std::shared_ptr<LLVMInstruction>> m_listInstructions;
242-
std::vector<std::vector<llvm::Value *>> m_heap; // scopes
245+
std::vector<std::vector<llvm::Value *>> m_stringHeap; // scopes
243246

244247
std::shared_ptr<ExecutableCode> m_output;
245248
};

0 commit comments

Comments
 (0)