Skip to content

Commit 96645de

Browse files
committed
Minor patch for sitemap parsing
1 parent f08f860 commit 96645de

File tree

3 files changed

+32
-16
lines changed

3 files changed

+32
-16
lines changed

data/txt/sha256sums.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ c1cb56f2a43e9f2f6b25d5f3d504e856ea21df6fc14af5e37b1000feef2bdb5a lib/core/optio
188188
48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py
189189
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
190190
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
191-
66cbde8c76851d5e8de6b93d6dbd3cd58e4473a1f307326aa672c3628253775e lib/core/settings.py
191+
8508162b2a95e54102ee8aec95888d7e2061d73b7d0e9ecd47d4f5e22ca94820 lib/core/settings.py
192192
cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py
193193
bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py
194194
d35650179816193164a5f177102f18379dfbe6bb6d40fbb67b78d907b41c8038 lib/core/target.py
@@ -206,7 +206,7 @@ c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/hand
206206
1ad9054cd8476a520d4e2c141085ae45d94519df5c66f25fac41fe7d552ab952 lib/parse/html.py
207207
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/parse/__init__.py
208208
d2e771cdacef25ee3fdc0e0355b92e7cd1b68f5edc2756ffc19f75d183ba2c73 lib/parse/payloads.py
209-
80d26a30abe948faf817a14f746cc8b3e2341ea8286830cccaae253b8ac0cdff lib/parse/sitemap.py
209+
455ab0ec63e55cd56ce4a884b85bdc089223155008cab0f3696da5a33118f95b lib/parse/sitemap.py
210210
1be3da334411657461421b8a26a0f2ff28e1af1e28f1e963c6c92768f9b0847c lib/request/basicauthhandler.py
211211
a1c638493ecdc5194db7186bbfed815c6eed2344f2607cac8c9fa50534824266 lib/request/basic.py
212212
bc61bc944b81a7670884f82231033a6ac703324b34b071c9834886a92e249d0e lib/request/chunkedhandler.py

lib/core/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from thirdparty import six
2121

2222
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
23-
VERSION = "1.10.2.2"
23+
VERSION = "1.10.2.3"
2424
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
2525
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
2626
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)

lib/parse/sitemap.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
abortedFlag = None
1919

20-
def parseSitemap(url, retVal=None):
20+
def parseSitemap(url, retVal=None, visited=None):
2121
global abortedFlag
2222

2323
if retVal is not None:
@@ -27,25 +27,41 @@ def parseSitemap(url, retVal=None):
2727
if retVal is None:
2828
abortedFlag = False
2929
retVal = OrderedSet()
30+
visited = set()
31+
32+
if url in visited:
33+
return retVal
34+
35+
visited.add(url)
3036

3137
try:
3238
content = Request.getPage(url=url, raise404=True)[0] if not abortedFlag else ""
3339
except _http_client.InvalidURL:
3440
errMsg = "invalid URL given for sitemap ('%s')" % url
3541
raise SqlmapSyntaxException(errMsg)
3642

37-
for match in re.finditer(r"<loc>\s*([^<]+)", content or ""):
38-
if abortedFlag:
39-
break
40-
url = match.group(1).strip()
41-
if url.endswith(".xml") and "sitemap" in url.lower():
42-
if kb.followSitemapRecursion is None:
43-
message = "sitemap recursion detected. Do you want to follow? [y/N] "
44-
kb.followSitemapRecursion = readInput(message, default='N', boolean=True)
45-
if kb.followSitemapRecursion:
46-
parseSitemap(url, retVal)
47-
else:
48-
retVal.add(url)
43+
if content:
44+
content = re.sub(r"", "", content, flags=re.DOTALL)
45+
46+
for match in re.finditer(r"<\w*?loc[^>]*>\s*([^<]+)", content, re.I):
47+
if abortedFlag:
48+
break
49+
50+
foundUrl = match.group(1).strip()
51+
52+
# Basic validation to avoid junk
53+
if not foundUrl.startswith("http"):
54+
continue
55+
56+
if foundUrl.endswith(".xml") and "sitemap" in foundUrl.lower():
57+
if kb.followSitemapRecursion is None:
58+
message = "sitemap recursion detected. Do you want to follow? [y/N] "
59+
kb.followSitemapRecursion = readInput(message, default='N', boolean=True)
60+
61+
if kb.followSitemapRecursion:
62+
parseSitemap(foundUrl, retVal, visited)
63+
else:
64+
retVal.add(foundUrl)
4965

5066
except KeyboardInterrupt:
5167
abortedFlag = True

0 commit comments

Comments
 (0)