Skip to content

Commit 5327ca7

Browse files
authored
Merge pull request #812 from jbj/ir-backedge
C++: IR back-edge detection based on TranslatedStmt
2 parents ab1f96f + b55573e commit 5327ca7

File tree

27 files changed

+756
-125
lines changed

27 files changed

+756
-125
lines changed

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class IRBlock extends IRBlockBase {
9090
blockSuccessor(this, result, kind)
9191
}
9292

93+
final IRBlock getBackEdgeSuccessor(EdgeKind kind) {
94+
backEdgeSuccessor(this, result, kind)
95+
}
96+
9397
final predicate immediatelyDominates(IRBlock block) {
9498
blockImmediatelyDominates(this, block)
9599
}
@@ -132,7 +136,10 @@ private predicate startsBasicBlock(Instruction instr) {
132136
exists(Instruction predecessor, EdgeKind kind |
133137
instr = predecessor.getSuccessor(kind) and
134138
not kind instanceof GotoEdge
135-
) // Incoming edge is not a GotoEdge
139+
) or // Incoming edge is not a GotoEdge
140+
exists(Instruction predecessor |
141+
instr = Construction::getInstructionBackEdgeSuccessor(predecessor, _)
142+
) // A back edge enters this instruction
136143
)
137144
}
138145

@@ -184,6 +191,41 @@ private cached module Cached {
184191
)
185192
}
186193

194+
cached predicate backEdgeSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
195+
backEdgeSuccessorRaw(pred, succ, kind)
196+
or
197+
forwardEdgeRaw+(pred, pred) and
198+
blockSuccessor(pred, succ, kind)
199+
}
200+
201+
/**
202+
* Holds if there is an edge from `pred` to `succ` that is not a back edge.
203+
*/
204+
private predicate forwardEdgeRaw(TIRBlock pred, TIRBlock succ) {
205+
exists(EdgeKind kind |
206+
blockSuccessor(pred, succ, kind) and
207+
not backEdgeSuccessorRaw(pred, succ, kind)
208+
)
209+
}
210+
211+
/**
212+
* Holds if the `kind`-edge from `pred` to `succ` is a back edge according to
213+
* `Construction`.
214+
*
215+
* There could be loops of non-back-edges if there is a flaw in the IR
216+
* construction or back-edge detection, and this could cause non-termination
217+
* of subsequent analysis. To prevent that, a subsequent predicate further
218+
* classifies all edges as back edges if they are involved in a loop of
219+
* non-back-edges.
220+
*/
221+
private predicate backEdgeSuccessorRaw(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
222+
exists(Instruction predLast, Instruction succFirst |
223+
predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
224+
succFirst = Construction::getInstructionBackEdgeSuccessor(predLast, kind) and
225+
succ = MkIRBlock(succFirst)
226+
)
227+
}
228+
187229
cached predicate blockSuccessor(TIRBlock pred, TIRBlock succ) {
188230
blockSuccessor(pred, succ, _)
189231
}

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,54 @@ module InstructionSanity {
157157
blockCount = count(instr.getBlock()) and
158158
blockCount != 1
159159
}
160+
161+
private predicate forwardEdge(IRBlock b1, IRBlock b2) {
162+
b1.getASuccessor() = b2 and
163+
not b1.getBackEdgeSuccessor(_) = b2
164+
}
165+
166+
/**
167+
* Holds if `f` contains a loop in which no edge is a back edge.
168+
*
169+
* This check ensures we don't have too _few_ back edges.
170+
*/
171+
query predicate containsLoopOfForwardEdges(FunctionIR f) {
172+
exists(IRBlock block |
173+
forwardEdge+(block, block) and
174+
block.getFunctionIR() = f
175+
)
176+
}
177+
178+
/**
179+
* Holds if `block` is reachable from its function entry point but would not
180+
* be reachable by traversing only forward edges. This check is skipped for
181+
* functions containing `goto` statements as the property does not generally
182+
* hold there.
183+
*
184+
* This check ensures we don't have too _many_ back edges.
185+
*/
186+
query predicate lostReachability(IRBlock block) {
187+
exists(FunctionIR f, IRBlock entry |
188+
entry = f.getEntryBlock() and
189+
entry.getASuccessor+() = block and
190+
not forwardEdge+(entry, block) and
191+
not exists(GotoStmt s | s.getEnclosingFunction() = f.getFunction())
192+
)
193+
}
194+
195+
/**
196+
* Holds if the number of back edges differs between the `Instruction` graph
197+
* and the `IRBlock` graph.
198+
*/
199+
query predicate backEdgeCountMismatch(Function f, int fromInstr, int fromBlock) {
200+
fromInstr = count(Instruction i1, Instruction i2 |
201+
i1.getFunction() = f and i1.getBackEdgeSuccessor(_) = i2
202+
) and
203+
fromBlock = count(IRBlock b1, IRBlock b2 |
204+
b1.getFunction() = f and b1.getBackEdgeSuccessor(_) = b2
205+
) and
206+
fromInstr != fromBlock
207+
}
160208
}
161209

162210
/**
@@ -489,6 +537,24 @@ class Instruction extends Construction::TInstruction {
489537
result = Construction::getInstructionSuccessor(this, kind)
490538
}
491539

540+
/**
541+
* Gets the a _back-edge successor_ of this instruction along the control
542+
* flow edge specified by `kind`. A back edge in the control-flow graph is
543+
* intuitively the edge that goes back around a loop. If all back edges are
544+
* removed from the control-flow graph, it becomes acyclic.
545+
*/
546+
final Instruction getBackEdgeSuccessor(EdgeKind kind) {
547+
// We don't take these edges from
548+
// `Construction::getInstructionBackEdgeSuccessor` since that relation has
549+
// not been treated to remove any loops that might be left over due to
550+
// flaws in the IR construction or back-edge detection.
551+
exists(IRBlock block |
552+
block = this.getBlock() and
553+
this = block.getLastInstruction() and
554+
result = block.getBackEdgeSuccessor(kind).getFirstInstruction()
555+
)
556+
}
557+
492558
/**
493559
* Gets all direct successors of this instruction.
494560
*/

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key,
287287
(
288288
(
289289
key = "semmle.label" and
290-
value = kind.toString()
290+
if predBlock.getBackEdgeSuccessor(kind) = succBlock
291+
then value = kind.toString() + " (back edge)"
292+
else value = kind.toString()
291293
) or
292294
(
293295
key = "semmle.order" and

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,23 @@ cached private module Cached {
279279
)
280280
}
281281

282+
cached Instruction getInstructionBackEdgeSuccessor(Instruction instruction, EdgeKind kind) {
283+
exists(OldInstruction oldInstruction |
284+
not Reachability::isInfeasibleInstructionSuccessor(oldInstruction, kind) and
285+
// There is only one case for the translation into `result` because the
286+
// SSA construction never inserts extra instructions _before_ an existing
287+
// instruction.
288+
getOldInstruction(result) = oldInstruction.getBackEdgeSuccessor(kind) and
289+
// There are two cases for the translation into `instruction` because the
290+
// SSA construction might have inserted a chi node _after_
291+
// `oldInstruction`, in which case the back edge should come out of the
292+
// chi node instead.
293+
if hasChiNode(_, oldInstruction)
294+
then instruction = getChiInstruction(oldInstruction)
295+
else instruction = getNewInstruction(oldInstruction)
296+
)
297+
}
298+
282299
cached IRVariable getInstructionVariable(Instruction instruction) {
283300
result = getNewIRVariable(getOldInstruction(instruction).(OldIR::VariableInstruction).getVariable())
284301
}

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class IRBlock extends IRBlockBase {
9090
blockSuccessor(this, result, kind)
9191
}
9292

93+
final IRBlock getBackEdgeSuccessor(EdgeKind kind) {
94+
backEdgeSuccessor(this, result, kind)
95+
}
96+
9397
final predicate immediatelyDominates(IRBlock block) {
9498
blockImmediatelyDominates(this, block)
9599
}
@@ -132,7 +136,10 @@ private predicate startsBasicBlock(Instruction instr) {
132136
exists(Instruction predecessor, EdgeKind kind |
133137
instr = predecessor.getSuccessor(kind) and
134138
not kind instanceof GotoEdge
135-
) // Incoming edge is not a GotoEdge
139+
) or // Incoming edge is not a GotoEdge
140+
exists(Instruction predecessor |
141+
instr = Construction::getInstructionBackEdgeSuccessor(predecessor, _)
142+
) // A back edge enters this instruction
136143
)
137144
}
138145

@@ -184,6 +191,41 @@ private cached module Cached {
184191
)
185192
}
186193

194+
cached predicate backEdgeSuccessor(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
195+
backEdgeSuccessorRaw(pred, succ, kind)
196+
or
197+
forwardEdgeRaw+(pred, pred) and
198+
blockSuccessor(pred, succ, kind)
199+
}
200+
201+
/**
202+
* Holds if there is an edge from `pred` to `succ` that is not a back edge.
203+
*/
204+
private predicate forwardEdgeRaw(TIRBlock pred, TIRBlock succ) {
205+
exists(EdgeKind kind |
206+
blockSuccessor(pred, succ, kind) and
207+
not backEdgeSuccessorRaw(pred, succ, kind)
208+
)
209+
}
210+
211+
/**
212+
* Holds if the `kind`-edge from `pred` to `succ` is a back edge according to
213+
* `Construction`.
214+
*
215+
* There could be loops of non-back-edges if there is a flaw in the IR
216+
* construction or back-edge detection, and this could cause non-termination
217+
* of subsequent analysis. To prevent that, a subsequent predicate further
218+
* classifies all edges as back edges if they are involved in a loop of
219+
* non-back-edges.
220+
*/
221+
private predicate backEdgeSuccessorRaw(TIRBlock pred, TIRBlock succ, EdgeKind kind) {
222+
exists(Instruction predLast, Instruction succFirst |
223+
predLast = getInstruction(pred, getInstructionCount(pred) - 1) and
224+
succFirst = Construction::getInstructionBackEdgeSuccessor(predLast, kind) and
225+
succ = MkIRBlock(succFirst)
226+
)
227+
}
228+
187229
cached predicate blockSuccessor(TIRBlock pred, TIRBlock succ) {
188230
blockSuccessor(pred, succ, _)
189231
}

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,54 @@ module InstructionSanity {
157157
blockCount = count(instr.getBlock()) and
158158
blockCount != 1
159159
}
160+
161+
private predicate forwardEdge(IRBlock b1, IRBlock b2) {
162+
b1.getASuccessor() = b2 and
163+
not b1.getBackEdgeSuccessor(_) = b2
164+
}
165+
166+
/**
167+
* Holds if `f` contains a loop in which no edge is a back edge.
168+
*
169+
* This check ensures we don't have too _few_ back edges.
170+
*/
171+
query predicate containsLoopOfForwardEdges(FunctionIR f) {
172+
exists(IRBlock block |
173+
forwardEdge+(block, block) and
174+
block.getFunctionIR() = f
175+
)
176+
}
177+
178+
/**
179+
* Holds if `block` is reachable from its function entry point but would not
180+
* be reachable by traversing only forward edges. This check is skipped for
181+
* functions containing `goto` statements as the property does not generally
182+
* hold there.
183+
*
184+
* This check ensures we don't have too _many_ back edges.
185+
*/
186+
query predicate lostReachability(IRBlock block) {
187+
exists(FunctionIR f, IRBlock entry |
188+
entry = f.getEntryBlock() and
189+
entry.getASuccessor+() = block and
190+
not forwardEdge+(entry, block) and
191+
not exists(GotoStmt s | s.getEnclosingFunction() = f.getFunction())
192+
)
193+
}
194+
195+
/**
196+
* Holds if the number of back edges differs between the `Instruction` graph
197+
* and the `IRBlock` graph.
198+
*/
199+
query predicate backEdgeCountMismatch(Function f, int fromInstr, int fromBlock) {
200+
fromInstr = count(Instruction i1, Instruction i2 |
201+
i1.getFunction() = f and i1.getBackEdgeSuccessor(_) = i2
202+
) and
203+
fromBlock = count(IRBlock b1, IRBlock b2 |
204+
b1.getFunction() = f and b1.getBackEdgeSuccessor(_) = b2
205+
) and
206+
fromInstr != fromBlock
207+
}
160208
}
161209

162210
/**
@@ -489,6 +537,24 @@ class Instruction extends Construction::TInstruction {
489537
result = Construction::getInstructionSuccessor(this, kind)
490538
}
491539

540+
/**
541+
* Gets the a _back-edge successor_ of this instruction along the control
542+
* flow edge specified by `kind`. A back edge in the control-flow graph is
543+
* intuitively the edge that goes back around a loop. If all back edges are
544+
* removed from the control-flow graph, it becomes acyclic.
545+
*/
546+
final Instruction getBackEdgeSuccessor(EdgeKind kind) {
547+
// We don't take these edges from
548+
// `Construction::getInstructionBackEdgeSuccessor` since that relation has
549+
// not been treated to remove any loops that might be left over due to
550+
// flaws in the IR construction or back-edge detection.
551+
exists(IRBlock block |
552+
block = this.getBlock() and
553+
this = block.getLastInstruction() and
554+
result = block.getBackEdgeSuccessor(kind).getFirstInstruction()
555+
)
556+
}
557+
492558
/**
493559
* Gets all direct successors of this instruction.
494560
*/

cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,9 @@ query predicate edges(PrintableIRBlock pred, PrintableIRBlock succ, string key,
287287
(
288288
(
289289
key = "semmle.label" and
290-
value = kind.toString()
290+
if predBlock.getBackEdgeSuccessor(kind) = succBlock
291+
then value = kind.toString() + " (back edge)"
292+
else value = kind.toString()
291293
) or
292294
(
293295
key = "semmle.order" and

0 commit comments

Comments
 (0)