Skip to content

Commit 531dced

Browse files
authored
feat(auto_source): add support for auto_source feature (#676)
1 parent 8eeb9e6 commit 531dced

22 files changed

+1093
-40
lines changed

Gemfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,21 @@ source 'https://rubygems.org'
44

55
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
66

7-
gem 'html2rss'
7+
gem 'html2rss', '~> 0.14'
88
gem 'html2rss-configs', github: 'html2rss/html2rss-configs'
99

1010
# Use these instead of the two above (uncomment them) when developing locally:
1111
# gem 'html2rss', path: '../html2rss'
1212
# gem 'html2rss-configs', path: '../html2rss-configs'
1313

14+
gem 'base64'
1415
gem 'erubi'
1516
gem 'parallel'
1617
gem 'rack-cache'
1718
gem 'rack-timeout'
1819
gem 'rack-unreloader'
1920
gem 'roda'
21+
gem 'ssrf_filter'
2022
gem 'tilt'
2123

2224
gem 'puma', require: false
@@ -33,7 +35,10 @@ group :development do
3335
end
3436

3537
group :test do
38+
gem 'climate_control'
39+
gem 'rack-test'
3640
gem 'rspec'
3741
gem 'simplecov', require: false
3842
gem 'vcr'
43+
gem 'webmock'
3944
end

Gemfile.lock

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,14 @@ GEM
1111
addressable (2.8.7)
1212
public_suffix (>= 2.0.2, < 7.0)
1313
ast (2.4.2)
14+
base64 (0.2.0)
15+
bigdecimal (3.1.8)
1416
byebug (11.1.3)
17+
climate_control (1.2.0)
1518
concurrent-ruby (1.3.4)
19+
crack (1.0.0)
20+
bigdecimal
21+
rexml
1622
crass (1.0.6)
1723
diff-lcs (1.5.1)
1824
docile (1.4.1)
@@ -25,6 +31,7 @@ GEM
2531
faraday (>= 1, < 3)
2632
faraday-net_http (3.3.0)
2733
net-http
34+
hashdiff (1.1.1)
2835
html2rss (0.14.0)
2936
addressable (~> 2.7)
3037
faraday (> 2.0.1, < 3.0)
@@ -75,6 +82,8 @@ GEM
7582
rack (3.1.7)
7683
rack-cache (1.17.0)
7784
rack (>= 0.4)
85+
rack-test (2.1.0)
86+
rack (>= 1.3)
7887
rack-timeout (0.7.0)
7988
rack-unreloader (2.1.0)
8089
rainbow (3.1.1)
@@ -132,13 +141,18 @@ GEM
132141
simplecov_json_formatter (~> 0.1)
133142
simplecov-html (0.12.3)
134143
simplecov_json_formatter (0.1.4)
144+
ssrf_filter (1.1.2)
135145
thor (1.3.2)
136146
tilt (2.4.0)
137147
tzinfo (2.0.6)
138148
concurrent-ruby (~> 1.0)
139149
unicode-display_width (2.5.0)
140150
uri (0.13.1)
141151
vcr (6.2.0)
152+
webmock (3.24.0)
153+
addressable (>= 2.8.0)
154+
crack (>= 0.3.2)
155+
hashdiff (>= 0.4.0, < 2.0.0)
142156
yard (0.9.36)
143157
zeitwerk (2.6.18)
144158

@@ -151,13 +165,16 @@ PLATFORMS
151165
x86_64-linux
152166

153167
DEPENDENCIES
168+
base64
154169
byebug
170+
climate_control
155171
erubi
156-
html2rss
172+
html2rss (~> 0.14)
157173
html2rss-configs!
158174
parallel
159175
puma
160176
rack-cache
177+
rack-test
161178
rack-timeout
162179
rack-unreloader
163180
rake
@@ -169,8 +186,10 @@ DEPENDENCIES
169186
rubocop-rspec
170187
rubocop-thread_safety
171188
simplecov
189+
ssrf_filter
172190
tilt
173191
vcr
192+
webmock
174193
yard
175194

176195
BUNDLED WITH

README.md

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,16 @@ services:
4545
target: /app/config/feeds.yml
4646
read_only: true
4747
environment:
48-
- RACK_ENV=production
49-
- HEALTH_CHECK_USERNAME=health
50-
- HEALTH_CHECK_PASSWORD=please-set-YOUR-OWN-veeeeeery-l0ng-aNd-h4rd-to-gue55-Passw0rd!
48+
RACK_ENV: production
49+
HEALTH_CHECK_USERNAME: health
50+
HEALTH_CHECK_PASSWORD: please-set-YOUR-OWN-veeeeeery-l0ng-aNd-h4rd-to-gue55-Passw0rd!
51+
# AUTO_SOURCE_ENABLED: true
52+
# AUTO_SOURCE_USERNAME: foobar
53+
# AUTO_SOURCE_PASSWORD: A-Unique-And-Long-Password-For-Your-Own-Instance
54+
## to allow just requests originating from the local host
55+
# AUTO_SOURCE_ALLOWED_ORIGINS: 127.0.0.1:3000
56+
## to allow multiple origins, seperate those via comma:
57+
# AUTO_SOURCE_ALLOWED_ORIGINS: example.com,h2r.host.tld
5158
watchtower:
5259
image: containrrr/watchtower
5360
volumes:
@@ -66,6 +73,31 @@ The [watchtower](https://containrrr.dev/watchtower/) service automatically pulls
6673

6774
The `docker-compose.yml` above contains a service description for watchtower.
6875

76+
## How to use automatic feed generation
77+
78+
> [!NOTE]
79+
> This feature is disabled by default.
80+
81+
To enable the `auto_source` feature, comment in the env variables in the `docker-compose.yml` file from above and change the values accordingly:
82+
83+
```yaml
84+
environment:
85+
## … snip ✁
86+
AUTO_SOURCE_ENABLED: true
87+
AUTO_SOURCE_USERNAME: foobar
88+
AUTO_SOURCE_PASSWORD: A-Unique-And-Long-Password-For-Your-Own-Instance
89+
## to allow just requests originating from the local host
90+
AUTO_SOURCE_ALLOWED_ORIGINS: 127.0.0.1:3000
91+
## to allow multiple origins, seperate those via comma:
92+
# AUTO_SOURCE_ALLOWED_ORIGINS: example.com,h2r.host.tld
93+
## … snap ✃
94+
```
95+
96+
Restart the container and open <http://127.0.0.1:3000/auto_source>.
97+
When asked, enter your username and password.
98+
99+
Then enter the URL of a website and click on the _Generate_ button.
100+
69101
## How to use the included configs
70102

71103
html2rss-web comes with many feed configs out of the box. [See the file list of all configs.](https://github.com/html2rss/html2rss-configs/tree/master/lib/html2rss/configs)
@@ -85,7 +117,7 @@ To build your own RSS feed, you need to create a _feed config_.\
85117
That _feed config_ goes into the file `feeds.yml`.\
86118
Check out the [`example` feed config](https://github.com/html2rss/html2rss-web/blob/master/config/feeds.yml#L9).
87119

88-
Please refer to [html2rss' README for a description of _the feed config and its options_](https://github.com/html2rss/html2rss#the-feed-config-and-its-options). html2rss-web is just a small web application that depends on html2rss.
120+
Please refer to [html2rss' README for a description of _the feed config and its options_](https://github.com/html2rss/html2rss#the-feed-config-and-its-options). html2rss-web is just a small web application that builds on html2rss.
89121

90122
## Versioning and releases
91123

@@ -112,15 +144,23 @@ If you're going to host a public instance, _please, please, please_:
112144

113145
### Supported ENV variables
114146

115-
| Name | Description |
116-
| ------------------------------ | -------------------------------- |
117-
| `PORT` | default: 3000 |
118-
| `RACK_ENV` | default: 'development' |
119-
| `RACK_TIMEOUT_SERVICE_TIMEOUT` | default: 15 |
120-
| `WEB_CONCURRENCY` | default: 2 |
121-
| `WEB_MAX_THREADS` | default: 5 |
122-
| `HEALTH_CHECK_USERNAME` | default: auto-generated on start |
123-
| `HEALTH_CHECK_PASSWORD` | default: auto-generated on start |
147+
| Name | Description |
148+
| ------------------------------ | ---------------------------------- |
149+
| `BASE_URL` | default: '<http://localhost:3000>' |
150+
| `LOG_LEVEL` | default: 'warn' |
151+
| `HEALTH_CHECK_USERNAME` | default: auto-generated on start |
152+
| `HEALTH_CHECK_PASSWORD` | default: auto-generated on start |
153+
| | |
154+
| `AUTO_SOURCE_ENABLED` | default: false |
155+
| `AUTO_SOURCE_USERNAME | no default |
156+
| `AUTO_SOURCE_PASSWORD | no default |
157+
| `AUTO_SOURCE_ALLOWED_ORIGINS` | no default. |
158+
| | |
159+
| `PORT` | default: 3000 |
160+
| `RACK_ENV` | default: 'development' |
161+
| `RACK_TIMEOUT_SERVICE_TIMEOUT` | default: 15 |
162+
| `WEB_CONCURRENCY` | default: 2 |
163+
| `WEB_MAX_THREADS` | default: 5 |
124164

125165
### Runtime monitoring via `GET /health_check.txt`
126166

app.rb

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
require 'roda'
44
require 'rack/cache'
5-
65
require_relative 'roda/roda_plugins/basic_auth'
76

87
module Html2rss
@@ -12,12 +11,9 @@ module Web
1211
#
1312
# It is built with [Roda](https://roda.jeremyevans.net/).
1413
class App < Roda
15-
# TODO: move to helper
16-
def self.development?
17-
ENV['RACK_ENV'] == 'development'
18-
end
14+
CONTENT_TYPE_RSS = 'application/xml'
1915

20-
def development? = self.class.development?
16+
def self.development? = ENV['RACK_ENV'] == 'development'
2117

2218
opts[:check_dynamic_arity] = false
2319
opts[:check_arity] = :warn
@@ -33,16 +29,16 @@ def development? = self.class.development?
3329
csp.script_src :self
3430
csp.connect_src :self
3531
csp.img_src :self
36-
csp.font_src :self
32+
csp.font_src :self, 'data:'
3733
csp.form_action :self
3834
csp.base_uri :none
39-
csp.frame_ancestors :none
35+
csp.frame_ancestors :self
36+
csp.frame_src :self
4037
csp.block_all_mixed_content
4138
end
4239

4340
plugin :default_headers,
4441
'Content-Type' => 'text/html',
45-
'X-Frame-Options' => 'deny',
4642
'X-Content-Type-Options' => 'nosniff',
4743
'X-XSS-Protection' => '1; mode=block'
4844

@@ -53,8 +49,9 @@ def development? = self.class.development?
5349
handle_error(error)
5450
end
5551

56-
plugin :hash_branches
52+
plugin :hash_branch_view_subdir
5753
plugin :public
54+
plugin :content_for
5855
plugin :render, escape: true, layout: 'layout'
5956
plugin :typecast_params
6057
plugin :basic_auth
@@ -69,7 +66,6 @@ def development? = self.class.development?
6966

7067
route do |r|
7168
r.public
72-
7369
r.hash_branches('')
7470

7571
r.root { view 'index' }

config.ru

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ require 'rubygems'
44
require 'bundler/setup'
55
require 'rack-timeout'
66

7-
use Rack::Timeout
8-
97
dev = ENV.fetch('RACK_ENV', nil) == 'development'
108

119
requires = Dir['app/**/*.rb']
@@ -26,6 +24,8 @@ if dev
2624

2725
run Unreloader
2826
else
27+
use Rack::Timeout
28+
2929
require_relative 'app'
3030
requires.each { |f| require_relative f }
3131

helpers/auto_source.rb

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# frozen_string_literal: true
2+
3+
require 'addressable'
4+
require 'base64'
5+
require 'html2rss'
6+
require 'ssrf_filter'
7+
8+
module Html2rss
9+
module Web
10+
##
11+
# Helper methods for handling auto source feature.
12+
class AutoSource
13+
def self.enabled? = ENV['AUTO_SOURCE_ENABLED'].to_s == 'true'
14+
def self.username = ENV.fetch('AUTO_SOURCE_USERNAME')
15+
def self.password = ENV.fetch('AUTO_SOURCE_PASSWORD')
16+
17+
def self.allowed_origins = ENV.fetch('AUTO_SOURCE_ALLOWED_ORIGINS', '')
18+
.split(',')
19+
.map(&:strip)
20+
.reject(&:empty?)
21+
.to_set
22+
23+
# @param encoded_url [String] Base64 encoded URL
24+
# @return [RSS::Rss]
25+
def self.build_auto_source_from_encoded_url(encoded_url)
26+
url = Addressable::URI.parse Base64.urlsafe_decode64(encoded_url)
27+
request = SsrfFilter.get(url)
28+
headers = request.to_hash.transform_values(&:first)
29+
30+
auto_source = Html2rss::AutoSource.new(url, body: request.body, headers:)
31+
32+
auto_source.channel.stylesheets << Html2rss::RssBuilder::Stylesheet.new(href: '/rss.xsl', type: 'text/xsl')
33+
34+
auto_source.build
35+
end
36+
37+
# @param rss [RSS::Rss]
38+
# @param default_in_minutes [Integer]
39+
# @return [Integer]
40+
def self.ttl_in_seconds(rss, default_in_minutes: 60)
41+
(rss&.channel&.ttl || default_in_minutes) * 60
42+
end
43+
44+
# @param request [Roda::RodaRequest]
45+
# @param response [Roda::RodaResponse]
46+
# @param allowed_origins [Set<String>]
47+
def self.check_request_origin!(request, response, allowed_origins = AutoSource.allowed_origins)
48+
if allowed_origins.empty?
49+
response.write 'No allowed origins are configured. Please set AUTO_SOURCE_ALLOWED_ORIGINS.'
50+
else
51+
origin = Set[request.env['HTTP_HOST'], request.env['HTTP_X_FORWARDED_HOST']].delete(nil)
52+
return if allowed_origins.intersect?(origin)
53+
54+
response.write 'Origin is not allowed.'
55+
end
56+
57+
response.status = 403
58+
request.halt
59+
end
60+
end
61+
end
62+
end
Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# frozen_string_literal: true
22

3+
require 'html2rss/configs'
4+
require_relative '../app/local_config'
5+
36
module Html2rss
47
module Web
58
class App
@@ -15,15 +18,21 @@ def handle_error(error) # rubocop:disable Metrics/MethodLength
1518
when LocalConfig::NotFound,
1619
Html2rss::Configs::ConfigNotFound
1720
set_error_response('Feed config not found', 404)
21+
when Html2rss::Error
22+
set_error_response('Html2rss error', 422)
1823
else
1924
set_error_response('Internal Server Error', 500)
2025
end
2126

22-
@show_backtrace = ENV.fetch('RACK_ENV', nil) == 'development'
27+
@show_backtrace = self.class.development?
2328
@error = error
29+
30+
set_view_subdir nil
2431
view 'error'
2532
end
2633

34+
private
35+
2736
def set_error_response(page_title, status)
2837
@page_title = page_title
2938
response.status = status

helpers/handle_html2rss_configs.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def handle_html2rss_configs(request, _folder_name, _config_name_with_ext)
77
path = RequestPath.new(request)
88

99
Html2rssFacade.from_config(path.full_config_name, typecast_params) do |config|
10-
response['Content-Type'] = 'text/xml'
10+
response['Content-Type'] = CONTENT_TYPE_RSS
1111
HttpCache.expires(response, config.ttl * 60, cache_control: 'public')
1212
end
1313
end

0 commit comments

Comments
 (0)