Skip to content

Commit bae4aca

Browse files
committed
Python: Fix bad join in StrConst::isUnicode
Also fixes a bug ("`B`" was not recognised as a bytestring prefix). The basic idea behind this fix is that the set of possible prefixes is fairly small, so it's easier just to precompute them, and then join them with the entire prefix of the string in question (rather than look at each string in isolation, get its prefix, and _then_ check whether it looks like it's a unicode string prefix, which essentially is what the code did before).
1 parent 1251bc5 commit bae4aca

File tree

1 file changed

+29
-7
lines changed

1 file changed

+29
-7
lines changed

python/ql/src/semmle/python/Exprs.qll

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -584,18 +584,40 @@ class Slice extends Slice_ {
584584
}
585585
}
586586

587+
/**
588+
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
589+
*
590+
* Helper predicate for `StrConst::isUnicode`.
591+
*/
592+
pragma[nomagic]
593+
private string unicode_prefix() {
594+
result = any(Str_ s).getPrefix() and
595+
result.charAt(_) in ["u", "U"]
596+
}
597+
598+
/**
599+
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
600+
*
601+
* Helper predicate for `StrConst::isUnicode`.
602+
*/
603+
pragma[nomagic]
604+
private string non_byte_prefix() {
605+
result = any(Str_ s).getPrefix() and
606+
not result.charAt(_) in ["b", "B"]
607+
}
608+
587609
/** A string constant. */
588610
class StrConst extends Str_, ImmutableLiteral {
589611
/* syntax: "hello" */
590612
predicate isUnicode() {
591-
this.getPrefix().charAt(_) = "u"
592-
or
593-
this.getPrefix().charAt(_) = "U"
594-
or
595-
not this.getPrefix().charAt(_) = "b" and major_version() = 3
613+
this.getPrefix() = unicode_prefix()
596614
or
597-
not this.getPrefix().charAt(_) = "b" and
598-
this.getEnclosingModule().hasFromFuture("unicode_literals")
615+
this.getPrefix() = non_byte_prefix() and
616+
(
617+
major_version() = 3
618+
or
619+
this.getEnclosingModule().hasFromFuture("unicode_literals")
620+
)
599621
}
600622

601623
deprecated override string strValue() { result = this.getS() }

0 commit comments

Comments
 (0)