-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtasks.py
More file actions
53 lines (39 loc) · 1.72 KB
/
tasks.py
File metadata and controls
53 lines (39 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from celery.utils.log import get_task_logger
from celeryapp import app
USER_AGENT = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
logger = get_task_logger(__name__)
@app.task
def red(action='loving', color='red', description=''):
message = '{action} him was {color}{description}.'.format(
action=action.title(), color=color,
description=' ' + description if description else '')
logger.info(message)
@app.task
def song_links():
page = requests.get(
'http://www.songlyrics.com/taylor-swift-lyrics/',
headers={'user-agent': USER_AGENT})
parsed_content = BeautifulSoup(page.content, 'html.parser')
links = parsed_content.find('div', {'id': 'colone-container'})
links = links.find('table', {'class': 'tracklist'})
links = links.find_all('a')
for i, link in enumerate(links):
href = link.get('href')
title = link.text
url = urljoin('http://www.azlyrics.com/lyrics/taylorswift.html', href)
song_lyrics.apply_async(
kwargs={'url': url, 'title': title}, countdown=i // 4)
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'results')
@app.task
def song_lyrics(url, title):
page = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'})
# have to mimic google bot here
parsed_content = BeautifulSoup(page.content, 'html.parser')
lyrics = parsed_content.find('p', {'id':'songLyricsDiv'}).text
filename = os.path.join(OUTPUT_DIR, '{}.txt'.format(title))
with open(filename, 'w') as lyric_file:
lyric_file.write(lyrics)