Skip to content

Commit 339a0cf

Browse files
committed
fix: Fetch content using httpx rather than having feedparser do this, since we can't set a timeout with feedparser. This also uses a consistent user agent header when validating versus fetching the feed.
1 parent 43300eb commit 339a0cf

File tree

6 files changed

+34
-11
lines changed

6 files changed

+34
-11
lines changed

example/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "django-rss-filter"
3-
version = "0.6.0"
3+
version = "0.7.0"
44
description = "Filter public RSS feeds, remove articles that contain certain keywords or categories."
55
authors = [
66
{name = "Kevin Renskers", email = "[email protected]"},

rssfilter/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# HTTP "User-Agent" header to send to servers when downloading feeds.
2+
# If you are embedding django-rss-filter in a larger application, you should
3+
# change this to your application name and URL.
4+
#
5+
# import rssfilter
6+
# rssfilter.USER_AGENT = "MyApp/1.0 +http://example.com/"
7+
USER_AGENT = "django-rss-filter/1.0 +https://github.com/loopwerk/django-rss-filter"

rssfilter/models.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.urls import reverse
88
from django.utils import timezone
99

10+
from . import USER_AGENT
1011
from .settings import RSS_FILTER_CACHE_SECONDS
1112
from .utils import filter_feed, validate_feed
1213

@@ -29,8 +30,9 @@ def clean(self):
2930
# Make sure we have a valid feed.
3031
# Let's assume it's valid if it's already in FeedCache.
3132
if not FeedCache.objects.filter(feed_url=self.feed_url).exists():
32-
if not validate_feed(self.feed_url):
33-
raise ValidationError({"feed_url": "This doesn't seem to be a valid RSS or Atom feed"})
33+
valid, message = validate_feed(self.feed_url)
34+
if not valid:
35+
raise ValidationError({"feed_url": message})
3436

3537
def get_filtered_feed_body(self) -> str:
3638
five_mins_ago = timezone.now() - timedelta(seconds=RSS_FILTER_CACHE_SECONDS)
@@ -59,7 +61,7 @@ def get_feed_body(self) -> str:
5961
if self.cache_date and self.cache_date > five_mins_ago:
6062
return self.feed_body
6163

62-
r = httpx.get(self.feed_url, follow_redirects=True)
64+
r = httpx.get(self.feed_url, follow_redirects=True, timeout=2, headers={"User-Agent": USER_AGENT})
6365
self.feed_body = r.text
6466
self.cache_date = timezone.now()
6567
self.save()

rssfilter/utils.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,30 @@
11
import feedparser
2+
import httpx
23
from feedgen.feed import FeedGenerator
4+
from httpx import ConnectError, ConnectTimeout
35

6+
from . import USER_AGENT
47

5-
def validate_feed(feed_url: str) -> bool:
8+
9+
def validate_feed(feed_url: str) -> tuple[bool, str]:
610
try:
7-
feed = feedparser.parse(feed_url)
11+
# Fetch content using httpx rather than having feedparser do this,
12+
# since we can't set a timeout with feedparser. It also makes sure
13+
# that validating and then fetching the feed is done in a consistent
14+
# manner.
15+
r = httpx.get(feed_url, follow_redirects=True, timeout=2, headers={"User-Agent": USER_AGENT})
16+
17+
feed = feedparser.parse(r.text)
818
version = feed.get("version", "")
919
if not version:
10-
return False
11-
return True
20+
return False, "This doesn't seem to be a valid RSS or Atom feed"
21+
return True, ""
1222
except ValueError:
13-
return False
23+
return False, "This doesn't seem to be a valid RSS or Atom feed"
24+
except ConnectTimeout:
25+
return False, "Couldn't load the URL due to a connection timeout"
26+
except ConnectError:
27+
return False, "Couldn't load the URL due to a connection error"
1428

1529

1630
def filter_feed(feed_body: str, filtered_words: str, filtered_categories: str) -> str:

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)