|
1 | 1 | defmodule CodeCorps.GitHub.EagerAPI do |
2 | 2 | @moduledoc """ |
3 | | - This module attempts to implement eager loading of a resource, by trying to |
4 | | - fetch all of its pages in parallel. |
5 | | -
|
6 | | - This should technically be faster than lazy loading. However, it fails due to |
7 | | - timeout errors, even when loading just two pages. |
8 | | -
|
9 | | - The assumption is that hackney needs to be configured to allow multiple |
10 | | - requests. |
| 3 | + Eager loads a resource from the GitHub API by fetching all of its pages in |
| 4 | + parallel. |
11 | 5 | """ |
12 | 6 |
|
13 | | - def eager_get_all(url, headers, options) do |
| 7 | + def get_all(url, headers, options) do |
14 | 8 | HTTPoison.start |
15 | 9 | {:ok, response} = HTTPoison.get(url, headers, options) |
16 | 10 |
|
@@ -43,43 +37,47 @@ defmodule CodeCorps.GitHub.EagerAPI do |
43 | 37 | end |
44 | 38 |
|
45 | 39 | defp extract_total_pages(links_string) do |
46 | | - # Unfortunately, the paginating info we get from GitHub's responses is not |
47 | | - # suitable for easy extraction. |
| 40 | + # We use regex to parse the pagination info from the GitHub API response |
| 41 | + # headers. |
48 | 42 | # |
49 | | - # The information is stored in the following response header: |
| 43 | + # The headers render pages in the following format: |
50 | 44 | # |
51 | 45 | # ``` |
52 | 46 | # {"Link", '<https://api.github.com/search/code?q=addClass+user%3Amozilla&page=15>; rel="next", |
53 | 47 | # <https://api.github.com/search/code?q=addClass+user%3Amozilla&page=34>; rel="last", |
54 | 48 | # <https://api.github.com/search/code?q=addClass+user%3Amozilla&page=1>; rel="first", |
55 | 49 | # <https://api.github.com/search/code?q=addClass+user%3Amozilla&page=13>; rel="prev"' |
56 | | - # |
57 | 50 | # ``` |
58 | 51 | # |
59 | | - # If the response has no list header, then that means we got all the records |
60 | | - # and there's just that one page. |
| 52 | + # If the response has no list header, then we have received all the records |
| 53 | + # from the only possible page. |
61 | 54 | # |
62 | 55 | # If the response has a list header, the value will contain at least the |
63 | 56 | # "last" relation. |
64 | | - # |
65 | | - # Unfortunatly, the only way to parse it is via regex. |
66 | 57 | links_string |
67 | 58 | |> String.split(", ") |
68 | 59 | |> Enum.map(fn link -> |
69 | | - # Searches for `rel=` |
70 | | - rel = Regex.run(~r{rel="([a-z]+)"}, link) |> List.last |
71 | | - # Searches for the following variations: |
72 | | - # ``` |
73 | | - # ?page={match}> |
74 | | - # ?page={match}&... |
75 | | - # &page={match}> |
76 | | - # &page={match}&... |
77 | | - # ``` |
78 | | - page = Regex.run(~r{[&/?]page=([^>&]+)}, link) |> List.last |> String.to_integer |
79 | | - |
| 60 | + rel = get_rel(link) |
| 61 | + page = get_page(link) |
80 | 62 | {rel, page} |
81 | 63 | end) |
82 | 64 | |> Enum.into(%{}) |
83 | 65 | |> Map.get("last") |
84 | 66 | end |
| 67 | + |
| 68 | + defp get_rel(link) do |
| 69 | + # Searches for `rel=` |
| 70 | + Regex.run(~r{rel="([a-z]+)"}, link) |> List.last() |
| 71 | + end |
| 72 | + |
| 73 | + defp get_page(link) do |
| 74 | + # Searches for the following variations: |
| 75 | + # ``` |
| 76 | + # ?page={match}> |
| 77 | + # ?page={match}&... |
| 78 | + # &page={match}> |
| 79 | + # &page={match}&... |
| 80 | + # ``` |
| 81 | + Regex.run(~r{[&/?]page=([^>&]+)}, link) |> List.last |> String.to_integer |
| 82 | + end |
85 | 83 | end |
0 commit comments