Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
5003CEM/ADV_3.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
44 lines (38 sloc)
2.83 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import concurrent.futures | |
import newspaper | |
URLs = ['http://www.foxnews.com/', | |
'http://www.cnn.com/', | |
'http://www.derspiegel.de/', | |
'http://www.bbc.co.uk/', | |
'https://theguardian.com', ] | |
def get_headlines(url): | |
'''Function for building a source from url.''' | |
result = newspaper.build(url, memoize_articles=False) # extract categories, feeds, articles, etc. | |
# from given URL | |
return result # return the built source | |
def concurrent_headlines(): | |
'''Function that gets the first five headlines from the given URL. | |
This function uses concurrent.futures for faster execution.''' | |
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: # use at most 5 threads | |
# to execute calls asynchronously | |
futureURL = {executor.submit(get_headlines, url): url for url in URLs} # iterate through URLs list and | |
# use every url as argument | |
# for get_headlines function | |
# every thread takes | |
# one url and execute | |
for future in concurrent.futures.as_completed(futureURL): # iterate through the futureURL | |
# as soon as the thread is completed | |
print('\n''The headlines from %s are' % futureURL[future], '\n') # print the message with url | |
for i in range(1, 6): # iterate through the articles | |
try: | |
art = future.result().articles[i] # get the article | |
art.download() # download the article | |
art.parse() # parse the article | |
except Exception as exc: # if there is an error | |
print('Exception: %s' % (exc)) # print info message about the error | |
else: | |
print(art.title) # print the title of the article | |
if __name__ == '__main__': | |
import timeit | |
elapsed_time = timeit.timeit("concurrent_headlines()", setup="from __main__ import concurrent_headlines", number=2)/2 | |
print(elapsed_time) #print the time it takes to execute the program |