diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml
index 5a190a05d..4c4badbb9 100644
--- a/.rubocop_todo.yml
+++ b/.rubocop_todo.yml
@@ -160,6 +160,7 @@ RSpec/LeakyLocalVariable:
RSpec/MessageExpectation:
Exclude:
- 'spec/commands/feed/fetch_one_spec.rb'
+ - 'spec/integration/feed_importing_spec.rb'
- 'spec/models/migration_status_spec.rb'
- 'spec/repositories/story_repository_spec.rb'
- 'spec/tasks/remove_old_stories_spec.rb'
@@ -180,6 +181,7 @@ RSpec/MultipleExpectations:
- 'spec/utils/feed_discovery_spec.rb'
- 'spec/utils/i18n_support_spec.rb'
- 'spec/utils/opml_parser_spec.rb'
+ - 'spec/utils/safe_fetch_spec.rb'
# Offense count: 5
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
diff --git a/Gemfile b/Gemfile
index 037eccf36..dcc9f8573 100644
--- a/Gemfile
+++ b/Gemfile
@@ -21,6 +21,7 @@ gem "httparty"
gem "jsbundling-rails"
gem "nokogiri"
gem "pg"
+gem "private_address_check", require: "private_address_check/tcpsocket_ext"
gem "propshaft"
gem "puma"
gem "rack-ssl"
diff --git a/Gemfile.lock b/Gemfile.lock
index 18975a2b3..93d1b5757 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -241,6 +241,7 @@ GEM
prettyprint
prettyprint (0.2.0)
prism (1.9.0)
+ private_address_check (0.8.0)
propshaft (1.3.2)
actionpack (>= 7.0.0)
activesupport (>= 7.0.0)
@@ -481,6 +482,7 @@ DEPENDENCIES
jsbundling-rails
nokogiri
pg
+ private_address_check
propshaft
pry-byebug
puma
diff --git a/app/commands/feed/fetch_one.rb b/app/commands/feed/fetch_one.rb
index 6d71b5f49..e1c5e39da 100644
--- a/app/commands/feed/fetch_one.rb
+++ b/app/commands/feed/fetch_one.rb
@@ -21,7 +21,7 @@ def call(feed)
private
def fetch_raw_feed(feed)
- response = HTTParty.get(feed.url).to_s
+ response = SafeFetch.body(feed.url)
Feedjira.parse(response)
end
diff --git a/app/utils/feed_discovery.rb b/app/utils/feed_discovery.rb
index 2664a09ee..e58740786 100644
--- a/app/utils/feed_discovery.rb
+++ b/app/utils/feed_discovery.rb
@@ -4,7 +4,7 @@ module FeedDiscovery
class << self
def call(url)
get_feed_for_url(url) do
- urls = Feedbag.find(url)
+ urls = discover_feeds(url)
return false if urls.empty?
get_feed_for_url(urls.first) { return false }
@@ -13,8 +13,14 @@ def call(url)
private
+ def discover_feeds(url)
+ SafeFetch.guard(url) { Feedbag.find(url) }
+ rescue SafeFetch::UnsafeUrl
+ []
+ end
+
def get_feed_for_url(url)
- response = HTTParty.get(url).to_s
+ response = SafeFetch.body(url)
feed = Feedjira.parse(response)
feed.feed_url ||= url
feed
diff --git a/app/utils/safe_fetch.rb b/app/utils/safe_fetch.rb
new file mode 100644
index 000000000..0ea94a787
--- /dev/null
+++ b/app/utils/safe_fetch.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+# Guards outbound feed requests against SSRF (CWE-918). Refuses non-http(s)
+# schemes and connections to private/reserved IPs, re-checking the connected
+# address on every redirect hop via PrivateAddressCheck's socket patch.
+module SafeFetch
+ module_function
+
+ ALLOWED_SCHEMES = ["http", "https"].freeze
+
+ class UnsafeUrl < StandardError
+ end
+
+ # Fetch a URL body under SSRF protection.
+ def body(url)
+ guard(url) { HTTParty.get(url).to_s }
+ end
+
+ # Run a block (e.g. Feedbag discovery) under the same protections.
+ def guard(url, &)
+ raise(UnsafeUrl, url) if ALLOWED_SCHEMES.exclude?(scheme(url))
+
+ PrivateAddressCheck.only_public_connections(&)
+ end
+
+ def scheme(url)
+ URI.parse(url).scheme
+ rescue URI::InvalidURIError
+ nil
+ end
+end
diff --git a/spec/integration/feed_importing_spec.rb b/spec/integration/feed_importing_spec.rb
index 4755f8af3..7c12a0df8 100644
--- a/spec/integration/feed_importing_spec.rb
+++ b/spec/integration/feed_importing_spec.rb
@@ -18,6 +18,13 @@ def create_feed(**)
)
end
+ # The local FeedServer binds to loopback, which the SSRF guard blocks.
+ # Treat addresses as public so these import specs can reach it.
+ def allow_local_fetches
+ allow(PrivateAddressCheck)
+ .to receive(:resolves_to_private_address?).and_return(false)
+ end
+
describe "Valid feed" do
# articles older than 3 days are ignored, so freeze time within
# applicable range of the stories in the sample feed
@@ -25,6 +32,7 @@ def create_feed(**)
describe "Importing for the first time" do
it "imports all entries" do
+ allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
feed = create_feed(url: create_server.url)
expect { Feed::FetchOne.call(feed) }
@@ -35,6 +43,7 @@ def create_feed(**)
describe "Importing for the second time" do
context "no new entries" do
it "does not create new stories" do
+ allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
feed = create_feed(url: create_server.url)
Feed::FetchOne.call(feed)
@@ -45,6 +54,7 @@ def create_feed(**)
context "new entries" do
it "creates new stories" do
+ allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
server = create_server
feed = create_feed(url: server.url)
@@ -72,6 +82,7 @@ def create_feed(**)
# last time this feed was fetched is after 00:00 the day the article
# was published.
+ allow_local_fetches
travel_to(Time.parse("2014-08-12T17:30:00Z"))
server = create_server(url:)
feed = create_feed(url: server.url, last_fetched:)
diff --git a/spec/utils/feed_discovery_spec.rb b/spec/utils/feed_discovery_spec.rb
index 4f5ddcd4e..247534cc3 100644
--- a/spec/utils/feed_discovery_spec.rb
+++ b/spec/utils/feed_discovery_spec.rb
@@ -39,6 +39,15 @@
expect(result).to be(false)
end
+ it "returns false without fetching when the url scheme is not http(s)" do
+ expect(HTTParty).not_to receive(:get)
+ expect(Feedbag).not_to receive(:find)
+
+ result = described_class.call("file:///etc/passwd")
+
+ expect(result).to be(false)
+ end
+
it "returns the feed if the discovered feed is parsable" do
feed = double(feed_url: url)
expect(HTTParty).to receive(:get).with(url)
diff --git a/spec/utils/safe_fetch_spec.rb b/spec/utils/safe_fetch_spec.rb
new file mode 100644
index 000000000..b1bd14085
--- /dev/null
+++ b/spec/utils/safe_fetch_spec.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+RSpec.describe SafeFetch do
+ describe ".guard" do
+ it "yields the block result for http urls" do
+ expect(described_class.guard("http://example.com") { :ok }).to eq(:ok)
+ end
+
+ it "yields the block result for https urls" do
+ expect(described_class.guard("https://example.com") { :ok }).to eq(:ok)
+ end
+
+ it "raises UnsafeUrl for the file scheme" do
+ expect { described_class.guard("file:///etc/passwd") { :ok } }
+ .to raise_error(SafeFetch::UnsafeUrl)
+ end
+
+ it "raises UnsafeUrl for a scheme-less url" do
+ expect { described_class.guard("example.com") { :ok } }
+ .to raise_error(SafeFetch::UnsafeUrl)
+ end
+
+ it "raises UnsafeUrl for a malformed url" do
+ expect { described_class.guard("http://[bad") { :ok } }
+ .to raise_error(SafeFetch::UnsafeUrl)
+ end
+ end
+
+ describe ".body" do
+ it "fetches the url body through the guard" do
+ response = instance_double(HTTParty::Response, to_s: "")
+ expect(HTTParty)
+ .to receive(:get).with("http://example.com").and_return(response)
+
+ expect(described_class.body("http://example.com")).to eq("")
+ end
+
+ it "raises UnsafeUrl without fetching for non-http(s) schemes" do
+ expect { described_class.body("file:///etc/passwd") }
+ .to raise_error(SafeFetch::UnsafeUrl)
+ end
+ end
+end