ADV_3.py

import concurrent.futures
import newspaper

URLs = ['http://www.foxnews.com/',
        'http://www.cnn.com/',
        'http://www.derspiegel.de/',
        'http://www.bbc.co.uk/',
        'https://theguardian.com', ]

def get_headlines(url):
    '''Function for building a source from url.'''

    result = newspaper.build(url, memoize_articles=False)   # extract categories, feeds, articles, etc.
                                                            # from given URL
    return result                                           # return the built source

def concurrent_headlines():
    '''Function that gets the first five headlines from the given URL.
       This function uses concurrent.futures for faster execution.'''

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:      # use at most 5 threads
                                                                                # to execute calls asynchronously
        futureURL = {executor.submit(get_headlines, url): url for url in URLs}  # iterate through URLs list and
                                                                                # use every url as argument
                                                                                # for get_headlines function
                                                                                # every thread takes
                                                                                # one url and execute
        for future in concurrent.futures.as_completed(futureURL):               # iterate through the futureURL
                                                                                # as soon as the thread is completed
            print('\n''The headlines from %s are' % futureURL[future], '\n')    # print the message with url
            for i in range(1, 6):                                               # iterate through the articles
                try:
                    art = future.result().articles[i]                           # get the article
                    art.download()                                              # download the article
                    art.parse()                                                 # parse the article
                except Exception as exc:                                        # if there is an error
                    print('Exception: %s' % (exc))                              # print info message about the error
                else:
                    print(art.title)                                            # print the title of the article

if __name__ == '__main__':
    import timeit
    elapsed_time = timeit.timeit("concurrent_headlines()", setup="from __main__ import concurrent_headlines", number=2)/2
    print(elapsed_time)     #print the time it takes to execute the program
	import concurrent.futures
	import newspaper

	URLs = ['http://www.foxnews.com/',
	'http://www.cnn.com/',
	'http://www.derspiegel.de/',
	'http://www.bbc.co.uk/',
	'https://theguardian.com', ]

	def get_headlines(url):
	'''Function for building a source from url.'''

	result = newspaper.build(url, memoize_articles=False) # extract categories, feeds, articles, etc.
	# from given URL
	return result # return the built source

	def concurrent_headlines():
	'''Function that gets the first five headlines from the given URL.
	This function uses concurrent.futures for faster execution.'''

	with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: # use at most 5 threads
	# to execute calls asynchronously
	futureURL = {executor.submit(get_headlines, url): url for url in URLs} # iterate through URLs list and
	# use every url as argument
	# for get_headlines function
	# every thread takes
	# one url and execute
	for future in concurrent.futures.as_completed(futureURL): # iterate through the futureURL
	# as soon as the thread is completed
	print('\n''The headlines from %s are' % futureURL[future], '\n') # print the message with url
	for i in range(1, 6): # iterate through the articles
	try:
	art = future.result().articles[i] # get the article
	art.download() # download the article
	art.parse() # parse the article
	except Exception as exc: # if there is an error
	print('Exception: %s' % (exc)) # print info message about the error
	else:
	print(art.title) # print the title of the article

	if __name__ == '__main__':
	import timeit
	elapsed_time = timeit.timeit("concurrent_headlines()", setup="from __main__ import concurrent_headlines", number=2)/2
	print(elapsed_time) #print the time it takes to execute the program