@@ -16,7 +16,7 @@ defmodule CF.Sources.Fetcher do
1616 start: { __MODULE__ , :start_link , [ opts ] } ,
1717 type: :supervisor ,
1818 restart: :permanent ,
19- shutdown: 500
19+ shutdown: 2000
2020 }
2121 end
2222
@@ -60,15 +60,15 @@ defmodule CF.Sources.Fetcher do
6060
6161 def get_queue , do: Fetcher.LinkChecker . get_queue ( )
6262
63- @ url_regex ~r/ ^https?:\/ \/ [-a-zA-Z0-9@:%._\+ ~#=]{2,256}\. [a-z]{2,6}\b ([-a-zA-Z0-9@:%_\+ .~#?&\/ \/ =]*)/
64-
6563 defp fetch ( url , callback ) do
66- without_domain = Regex . replace ( @ url_regex , url , "\\ 1" )
67- path = Regex . replace ( ~r/ \? .+$/ , without_domain , "" )
64+ uri = URI . parse ( url )
65+
66+ case do_fetch_source_metadata ( url , MIME . from_path ( uri . path ) ) do
67+ { :error , err } ->
68+ :error
6869
69- case do_fetch_source_metadata ( url , MIME . from_path ( path ) ) do
70- { :error , _ } -> :error
71- { :ok , result } -> callback . ( result )
70+ { :ok , result } ->
71+ callback . ( result )
7272 end
7373 end
7474
@@ -77,13 +77,13 @@ defmodule CF.Sources.Fetcher do
7777 defp do_fetch_source_metadata ( url , mime_types ) when mime_types in @ fetchable_mime_types do
7878 case HTTPoison . get (
7979 url ,
80- [ ] ,
80+ [ { "User-Agent" , "CaptainFact/2.0" } ] ,
8181 follow_redirect: true ,
8282 max_redirect: 5 ,
8383 hackney: [ pool: pool_name ( ) ]
8484 ) do
8585 { :ok , % HTTPoison.Response { status_code: 200 , body: body } } ->
86- { :ok , source_params_from_tree ( Floki . parse_document ( body ) ) }
86+ { :ok , source_params_from_tree ( Floki . parse_document! ( body ) ) }
8787
8888 { :ok , % HTTPoison.Response { status_code: 404 } } ->
8989 { :error , :not_found }
0 commit comments