Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ RSpec/LeakyLocalVariable:
RSpec/MessageExpectation:
Exclude:
- 'spec/commands/feed/fetch_one_spec.rb'
- 'spec/integration/feed_importing_spec.rb'
- 'spec/models/migration_status_spec.rb'
- 'spec/repositories/story_repository_spec.rb'
- 'spec/tasks/remove_old_stories_spec.rb'
Expand All @@ -180,6 +181,7 @@ RSpec/MultipleExpectations:
- 'spec/utils/feed_discovery_spec.rb'
- 'spec/utils/i18n_support_spec.rb'
- 'spec/utils/opml_parser_spec.rb'
- 'spec/utils/safe_fetch_spec.rb'

# Offense count: 5
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ gem "httparty"
gem "jsbundling-rails"
gem "nokogiri"
gem "pg"
gem "private_address_check", require: "private_address_check/tcpsocket_ext"
gem "propshaft"
gem "puma"
gem "rack-ssl"
Expand Down
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ GEM
prettyprint
prettyprint (0.2.0)
prism (1.9.0)
private_address_check (0.8.0)
propshaft (1.3.2)
actionpack (>= 7.0.0)
activesupport (>= 7.0.0)
Expand Down Expand Up @@ -481,6 +482,7 @@ DEPENDENCIES
jsbundling-rails
nokogiri
pg
private_address_check
propshaft
pry-byebug
puma
Expand Down
2 changes: 1 addition & 1 deletion app/commands/feed/fetch_one.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def call(feed)
private

def fetch_raw_feed(feed)
response = HTTParty.get(feed.url).to_s
response = SafeFetch.body(feed.url)
Feedjira.parse(response)
end

Expand Down
10 changes: 8 additions & 2 deletions app/utils/feed_discovery.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module FeedDiscovery
class << self
def call(url)
get_feed_for_url(url) do
urls = Feedbag.find(url)
urls = discover_feeds(url)
return false if urls.empty?

get_feed_for_url(urls.first) { return false }
Expand All @@ -13,8 +13,14 @@ def call(url)

private

def discover_feeds(url)
SafeFetch.guard(url) { Feedbag.find(url) }
rescue SafeFetch::UnsafeUrl
[]
end

def get_feed_for_url(url)
response = HTTParty.get(url).to_s
response = SafeFetch.body(url)
feed = Feedjira.parse(response)
feed.feed_url ||= url
feed
Expand Down
31 changes: 31 additions & 0 deletions app/utils/safe_fetch.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true

# Guards outbound feed requests against SSRF (CWE-918). Refuses non-http(s)
# schemes and connections to private/reserved IPs, re-checking the connected
# address on every redirect hop via PrivateAddressCheck's socket patch.
module SafeFetch
module_function

ALLOWED_SCHEMES = ["http", "https"].freeze

class UnsafeUrl < StandardError
end

# Fetch a URL body under SSRF protection.
def body(url)
guard(url) { HTTParty.get(url).to_s }
end

# Run a block (e.g. Feedbag discovery) under the same protections.
def guard(url, &)
raise(UnsafeUrl, url) if ALLOWED_SCHEMES.exclude?(scheme(url))

PrivateAddressCheck.only_public_connections(&)
end

def scheme(url)
URI.parse(url).scheme
rescue URI::InvalidURIError
nil
end
end
11 changes: 11 additions & 0 deletions spec/integration/feed_importing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,21 @@ def create_feed(**)
)
end

# The local FeedServer binds to loopback, which the SSRF guard blocks.
# Treat addresses as public so these import specs can reach it.
def allow_local_fetches
allow(PrivateAddressCheck)
.to receive(:resolves_to_private_address?).and_return(false)
end

describe "Valid feed" do
# articles older than 3 days are ignored, so freeze time within
# applicable range of the stories in the sample feed
# travel_to(Time.parse("2014-08-15T17:30:00Z"))

describe "Importing for the first time" do
it "imports all entries" do
allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
feed = create_feed(url: create_server.url)
expect { Feed::FetchOne.call(feed) }
Expand All @@ -35,6 +43,7 @@ def create_feed(**)
describe "Importing for the second time" do
context "no new entries" do
it "does not create new stories" do
allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
feed = create_feed(url: create_server.url)
Feed::FetchOne.call(feed)
Expand All @@ -45,6 +54,7 @@ def create_feed(**)

context "new entries" do
it "creates new stories" do
allow_local_fetches
travel_to(Time.parse("2014-08-15T17:30:00Z"))
server = create_server
feed = create_feed(url: server.url)
Expand Down Expand Up @@ -72,6 +82,7 @@ def create_feed(**)
# last time this feed was fetched is after 00:00 the day the article
# was published.

allow_local_fetches
travel_to(Time.parse("2014-08-12T17:30:00Z"))
server = create_server(url:)
feed = create_feed(url: server.url, last_fetched:)
Expand Down
9 changes: 9 additions & 0 deletions spec/utils/feed_discovery_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@
expect(result).to be(false)
end

it "returns false without fetching when the url scheme is not http(s)" do
expect(HTTParty).not_to receive(:get)
expect(Feedbag).not_to receive(:find)

result = described_class.call("file:///etc/passwd")

expect(result).to be(false)
end

it "returns the feed if the discovered feed is parsable" do
feed = double(feed_url: url)
expect(HTTParty).to receive(:get).with(url)
Expand Down
43 changes: 43 additions & 0 deletions spec/utils/safe_fetch_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# frozen_string_literal: true

RSpec.describe SafeFetch do
describe ".guard" do
it "yields the block result for http urls" do
expect(described_class.guard("http://example.com") { :ok }).to eq(:ok)
end

it "yields the block result for https urls" do
expect(described_class.guard("https://example.com") { :ok }).to eq(:ok)
end

it "raises UnsafeUrl for the file scheme" do
expect { described_class.guard("file:///etc/passwd") { :ok } }
.to raise_error(SafeFetch::UnsafeUrl)
end

it "raises UnsafeUrl for a scheme-less url" do
expect { described_class.guard("example.com") { :ok } }
.to raise_error(SafeFetch::UnsafeUrl)
end

it "raises UnsafeUrl for a malformed url" do
expect { described_class.guard("http://[bad") { :ok } }
.to raise_error(SafeFetch::UnsafeUrl)
end
end

describe ".body" do
it "fetches the url body through the guard" do
response = instance_double(HTTParty::Response, to_s: "<rss/>")
expect(HTTParty)
.to receive(:get).with("http://example.com").and_return(response)

expect(described_class.body("http://example.com")).to eq("<rss/>")
end

it "raises UnsafeUrl without fetching for non-http(s) schemes" do
expect { described_class.body("file:///etc/passwd") }
.to raise_error(SafeFetch::UnsafeUrl)
end
end
end
Loading