diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 5a190a05d..4c4badbb9 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -160,6 +160,7 @@ RSpec/LeakyLocalVariable: RSpec/MessageExpectation: Exclude: - 'spec/commands/feed/fetch_one_spec.rb' + - 'spec/integration/feed_importing_spec.rb' - 'spec/models/migration_status_spec.rb' - 'spec/repositories/story_repository_spec.rb' - 'spec/tasks/remove_old_stories_spec.rb' @@ -180,6 +181,7 @@ RSpec/MultipleExpectations: - 'spec/utils/feed_discovery_spec.rb' - 'spec/utils/i18n_support_spec.rb' - 'spec/utils/opml_parser_spec.rb' + - 'spec/utils/safe_fetch_spec.rb' # Offense count: 5 # Configuration parameters: EnforcedStyle, IgnoreSharedExamples. diff --git a/Gemfile b/Gemfile index 037eccf36..dcc9f8573 100644 --- a/Gemfile +++ b/Gemfile @@ -21,6 +21,7 @@ gem "httparty" gem "jsbundling-rails" gem "nokogiri" gem "pg" +gem "private_address_check", require: "private_address_check/tcpsocket_ext" gem "propshaft" gem "puma" gem "rack-ssl" diff --git a/Gemfile.lock b/Gemfile.lock index 18975a2b3..93d1b5757 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -241,6 +241,7 @@ GEM prettyprint prettyprint (0.2.0) prism (1.9.0) + private_address_check (0.8.0) propshaft (1.3.2) actionpack (>= 7.0.0) activesupport (>= 7.0.0) @@ -481,6 +482,7 @@ DEPENDENCIES jsbundling-rails nokogiri pg + private_address_check propshaft pry-byebug puma diff --git a/app/commands/feed/fetch_one.rb b/app/commands/feed/fetch_one.rb index 6d71b5f49..e1c5e39da 100644 --- a/app/commands/feed/fetch_one.rb +++ b/app/commands/feed/fetch_one.rb @@ -21,7 +21,7 @@ def call(feed) private def fetch_raw_feed(feed) - response = HTTParty.get(feed.url).to_s + response = SafeFetch.body(feed.url) Feedjira.parse(response) end diff --git a/app/utils/feed_discovery.rb b/app/utils/feed_discovery.rb index 2664a09ee..e58740786 100644 --- a/app/utils/feed_discovery.rb +++ b/app/utils/feed_discovery.rb @@ -4,7 +4,7 @@ module FeedDiscovery class << self def call(url) get_feed_for_url(url) do - urls = Feedbag.find(url) + urls = discover_feeds(url) return false if urls.empty? get_feed_for_url(urls.first) { return false } @@ -13,8 +13,14 @@ def call(url) private + def discover_feeds(url) + SafeFetch.guard(url) { Feedbag.find(url) } + rescue SafeFetch::UnsafeUrl + [] + end + def get_feed_for_url(url) - response = HTTParty.get(url).to_s + response = SafeFetch.body(url) feed = Feedjira.parse(response) feed.feed_url ||= url feed diff --git a/app/utils/safe_fetch.rb b/app/utils/safe_fetch.rb new file mode 100644 index 000000000..0ea94a787 --- /dev/null +++ b/app/utils/safe_fetch.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +# Guards outbound feed requests against SSRF (CWE-918). Refuses non-http(s) +# schemes and connections to private/reserved IPs, re-checking the connected +# address on every redirect hop via PrivateAddressCheck's socket patch. +module SafeFetch + module_function + + ALLOWED_SCHEMES = ["http", "https"].freeze + + class UnsafeUrl < StandardError + end + + # Fetch a URL body under SSRF protection. + def body(url) + guard(url) { HTTParty.get(url).to_s } + end + + # Run a block (e.g. Feedbag discovery) under the same protections. + def guard(url, &) + raise(UnsafeUrl, url) if ALLOWED_SCHEMES.exclude?(scheme(url)) + + PrivateAddressCheck.only_public_connections(&) + end + + def scheme(url) + URI.parse(url).scheme + rescue URI::InvalidURIError + nil + end +end diff --git a/spec/integration/feed_importing_spec.rb b/spec/integration/feed_importing_spec.rb index 4755f8af3..7c12a0df8 100644 --- a/spec/integration/feed_importing_spec.rb +++ b/spec/integration/feed_importing_spec.rb @@ -18,6 +18,13 @@ def create_feed(**) ) end + # The local FeedServer binds to loopback, which the SSRF guard blocks. + # Treat addresses as public so these import specs can reach it. + def allow_local_fetches + allow(PrivateAddressCheck) + .to receive(:resolves_to_private_address?).and_return(false) + end + describe "Valid feed" do # articles older than 3 days are ignored, so freeze time within # applicable range of the stories in the sample feed @@ -25,6 +32,7 @@ def create_feed(**) describe "Importing for the first time" do it "imports all entries" do + allow_local_fetches travel_to(Time.parse("2014-08-15T17:30:00Z")) feed = create_feed(url: create_server.url) expect { Feed::FetchOne.call(feed) } @@ -35,6 +43,7 @@ def create_feed(**) describe "Importing for the second time" do context "no new entries" do it "does not create new stories" do + allow_local_fetches travel_to(Time.parse("2014-08-15T17:30:00Z")) feed = create_feed(url: create_server.url) Feed::FetchOne.call(feed) @@ -45,6 +54,7 @@ def create_feed(**) context "new entries" do it "creates new stories" do + allow_local_fetches travel_to(Time.parse("2014-08-15T17:30:00Z")) server = create_server feed = create_feed(url: server.url) @@ -72,6 +82,7 @@ def create_feed(**) # last time this feed was fetched is after 00:00 the day the article # was published. + allow_local_fetches travel_to(Time.parse("2014-08-12T17:30:00Z")) server = create_server(url:) feed = create_feed(url: server.url, last_fetched:) diff --git a/spec/utils/feed_discovery_spec.rb b/spec/utils/feed_discovery_spec.rb index 4f5ddcd4e..247534cc3 100644 --- a/spec/utils/feed_discovery_spec.rb +++ b/spec/utils/feed_discovery_spec.rb @@ -39,6 +39,15 @@ expect(result).to be(false) end + it "returns false without fetching when the url scheme is not http(s)" do + expect(HTTParty).not_to receive(:get) + expect(Feedbag).not_to receive(:find) + + result = described_class.call("file:///etc/passwd") + + expect(result).to be(false) + end + it "returns the feed if the discovered feed is parsable" do feed = double(feed_url: url) expect(HTTParty).to receive(:get).with(url) diff --git a/spec/utils/safe_fetch_spec.rb b/spec/utils/safe_fetch_spec.rb new file mode 100644 index 000000000..b1bd14085 --- /dev/null +++ b/spec/utils/safe_fetch_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +RSpec.describe SafeFetch do + describe ".guard" do + it "yields the block result for http urls" do + expect(described_class.guard("http://example.com") { :ok }).to eq(:ok) + end + + it "yields the block result for https urls" do + expect(described_class.guard("https://example.com") { :ok }).to eq(:ok) + end + + it "raises UnsafeUrl for the file scheme" do + expect { described_class.guard("file:///etc/passwd") { :ok } } + .to raise_error(SafeFetch::UnsafeUrl) + end + + it "raises UnsafeUrl for a scheme-less url" do + expect { described_class.guard("example.com") { :ok } } + .to raise_error(SafeFetch::UnsafeUrl) + end + + it "raises UnsafeUrl for a malformed url" do + expect { described_class.guard("http://[bad") { :ok } } + .to raise_error(SafeFetch::UnsafeUrl) + end + end + + describe ".body" do + it "fetches the url body through the guard" do + response = instance_double(HTTParty::Response, to_s: "") + expect(HTTParty) + .to receive(:get).with("http://example.com").and_return(response) + + expect(described_class.body("http://example.com")).to eq("") + end + + it "raises UnsafeUrl without fetching for non-http(s) schemes" do + expect { described_class.body("file:///etc/passwd") } + .to raise_error(SafeFetch::UnsafeUrl) + end + end +end