From 658738b02dcdbe10acdd6071c52690a56226646f Mon Sep 17 00:00:00 2001 From: Michael Price Date: Wed, 24 Jul 2019 19:57:46 +0100 Subject: [PATCH 1/2] Restrict repos to those we have push access to There may be repositories on the user's list that they don't have push access to. This will cause failures when retrieving statistics from them as the API's require a higher level of permissions. --- get_traffic.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/get_traffic.py b/get_traffic.py index 50f73ee..cf0ac81 100644 --- a/get_traffic.py +++ b/get_traffic.py @@ -22,21 +22,26 @@ def updateUserStats(self): print(f">>> Getting repo details of user {user.name} with GitHub url : {user.html_url} ...") repos = user.get_repos() - results = [None] * repos.totalCount - idx = 0 threads = [] - for repo in repos: - # Spawn a thread for each repo - t = Thread(target=self.updateRepoStats, args=(repo,results,idx,)) + + # We're only able to retrieve repo statistics for those we can push to + repos = [repo for repo in repos if repo.permissions.push] + results = [None] * len(repos) + + for idx, repo in enumerate(repos): + # Spawn a thread for each repo + t = Thread(target=self.updateRepoStats, args=(repo, results, idx,)) t.start() threads.append(t) - idx += 1 # Wait for all threads to execute while len(threads): threads = [t for t in threads if t.is_alive()] - df = pd.read_csv(f'{self.user_name}.csv') if os.path.exists(f'{self.user_name}.csv') else pd.DataFrame(columns=['Repo', 'Views', 'Stars', 'Watching', 'Forks', 'Clones']) + if os.path.exists(f'{self.user_name}.csv'): + df = pd.read_csv(f'{self.user_name}.csv') + else: + df = pd.DataFrame(columns=['Repo', 'Views', 'Stars', 'Watching', 'Forks', 'Clones']) changed = False # Evaluate all repo results and update output CSV accordingly for row in results: From eec4d49f761f241b4074f008eb901518bfb3c203 Mon Sep 17 00:00:00 2001 From: Michael Price Date: Wed, 24 Jul 2019 20:02:29 +0100 Subject: [PATCH 2/2] Switch to using full_name instead of repo name There are users that will have multiple repositories with the same name. The user or organization name will help keep them distinct. --- get_traffic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/get_traffic.py b/get_traffic.py index cf0ac81..298e7f2 100644 --- a/get_traffic.py +++ b/get_traffic.py @@ -65,7 +65,7 @@ def updateUserStats(self): def updateRepoStats(self, repo, results, idx): row = [ - repo.name, + repo.full_name, repo.get_views_traffic()['count'], repo.stargazers_count, repo.watchers_count,