From e7700c2881255821b2e8ebe010c28d755363ce1c Mon Sep 17 00:00:00 2001 From: shbhmexe Date: Wed, 17 Dec 2025 22:57:00 +0530 Subject: [PATCH] fix(database,logparser): handle single-label domains + CommitDate parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `-m` (map unknowns to email domain) mode could crash when encountering single-label domains (for example: user@localhost). The crash happens because MapToEmployer() references `addr` even when the domain split loop never runs. Fix this by: - Keeping a safe default `addr` when the domain has no dots. - Making GetHackerDomain() generate a meaningful label for single-label domains (so it won’t collapse to just " *"). Also make logparser's date parsing ignore the `CommitDate:` prefix, which can appear in git logs produced with formats like `--pretty=fuller`, preventing ValueError crashes during date range filtering. Signed-off-by: shbhmexe shubhushukla586@gmail.com --- src/database.py | 11 +++++++++-- src/logparser.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/database.py b/src/database.py index b9e751a5..975b32d8 100755 --- a/src/database.py +++ b/src/database.py @@ -396,7 +396,11 @@ def AddEmailEmployerMapping (email, employer, end = nextyear, domain = False): # LG: Artificial Domains from Hacker's email domain names ArtificialDomains = {} def GetHackerDomain(dom, email): - new_dom = ''.join(map(lambda x: x.lower().capitalize(), dom.split('.')[:-1])) + parts = dom.split('.') + # If the domain has no dot (e.g. "localhost"), keep the whole value so we + # generate a meaningful label instead of just " *". + name_parts = parts if len(parts) == 1 else parts[:-1] + new_dom = ''.join(map(lambda x: x.lower().capitalize(), name_parts)) new_dom += ' *' key = (new_dom, dom) if key not in ArtificialDomains: @@ -420,7 +424,10 @@ def MapToEmployer (email, unknown = 0): if len (namedom) < 2: print 'Oops...funky email %s' % email_encode(email) return [(nextyear, GetEmployer ('Funky'), False)] - s = namedom[1].split ('.') + domain = namedom[1] + s = domain.split ('.') + # Keep a usable default for unknown==1 even when the domain has no dots. + addr = domain for dots in range (len (s) - 2, -1, -1): addr = '.'.join (s[dots:]) try: diff --git a/src/logparser.py b/src/logparser.py index 88cd518e..e0199808 100755 --- a/src/logparser.py +++ b/src/logparser.py @@ -63,7 +63,7 @@ def getDate(self, line): arr2 = [] for i in range(len(arr) - 1): s = arr[i] - if s != '' and s != 'Date:': + if s and s not in ('Date:', 'CommitDate:'): arr2.append(s) datestr = ' '.join(arr2) date = datetime.datetime.strptime(datestr, '%a %b %d %H:%M:%S %Y')