Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import concurrent.futures
import newspaper
URLs = ['http://www.foxnews.com/',
'http://www.cnn.com/',
'http://www.derspiegel.de/',
'http://www.bbc.co.uk/',
'https://theguardian.com', ]
def get_headlines(url):
'''Function for building a source from url.'''
result = newspaper.build(url, memoize_articles=False) # extract categories, feeds, articles, etc.
# from given URL
return result # return the built source
def concurrent_headlines():
'''Function that gets the first five headlines from the given URL.
This function uses concurrent.futures for faster execution.'''
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: # use at most 5 threads
# to execute calls asynchronously
futureURL = {executor.submit(get_headlines, url): url for url in URLs} # iterate through URLs list and
# use every url as argument
# for get_headlines function
# every thread takes
# one url and execute
for future in concurrent.futures.as_completed(futureURL): # iterate through the futureURL
# as soon as the thread is completed
print('\n''The headlines from %s are' % futureURL[future], '\n') # print the message with url
for i in range(1, 6): # iterate through the articles
try:
art = future.result().articles[i] # get the article
art.download() # download the article
art.parse() # parse the article
except Exception as exc: # if there is an error
print('Exception: %s' % (exc)) # print info message about the error
else:
print(art.title) # print the title of the article
if __name__ == '__main__':
import timeit
elapsed_time = timeit.timeit("concurrent_headlines()", setup="from __main__ import concurrent_headlines", number=2)/2
print(elapsed_time) #print the time it takes to execute the program