Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Python-Chatbot/stackoverflow.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
222 lines (175 sloc)
8.59 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
""" | |
Libraries: | |
- requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests | |
- bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages | |
""" | |
""" | |
I've learned how to use Beautiful Soup by following its documentation: | |
https://www.crummy.com/software/BeautifulSoup/bs4/doc/ | |
and this tutorial: | |
https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3 | |
by Lisa Tagliaferri | |
""" | |
async def stackoverflow(client, message): | |
#message.content at this point is: !flow user_query_here | |
url = message.content[6:] | |
url = url.lower().split(' ') | |
url = "+".join(url) | |
url = 'https://www.google.com/search?q=' + url + '+site%3Astackoverflow.com' | |
#preventing google from blocking bots | |
######################################################### | |
# Based on answer provided by alecxe (https://stackoverflow.com/questions/27652543/how-to-use-python-requests-to-fake-a-browser-visit) | |
headers = { | |
'User-Agent': 'Mozilla/5.0' | |
# if above is not working, try one of these: | |
#'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0' | |
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0' | |
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' | |
} | |
def parsePage(url): | |
"""take url as a string and parse it""" | |
page = requests.get(url, headers=headers) | |
######################################################### | |
soup = BeautifulSoup(page.text, 'html.parser') | |
return soup | |
soup = parsePage(url) | |
#delete image links | |
imgLinks = soup.find_all('img') | |
delete = [] | |
for link in imgLinks: | |
badLink = link['title'] | |
if 'meta' in badLink: | |
continue | |
else: | |
delete.append(badLink) | |
#search for google search results and put them in 'cleanLinks' list | |
searches = soup.find_all('a') | |
x=0 | |
cleanLinks = [] | |
for link in searches: | |
Flink = link['href'] | |
Flink = Flink[7:] | |
#filtering links not targeted | |
if 'questions' in Flink: | |
if 'webcache' in Flink: | |
continue | |
elif 'meta' in Flink: | |
continue | |
else: | |
cleanLinks.append(Flink) | |
for match in delete: | |
if match in Flink: | |
cleanLinks.remove(Flink) | |
else: | |
continue | |
else: | |
continue | |
def postPage(result): | |
"""take in url (meant to be from stackoverlow.com) as string and return a post by user on stackoverflow""" | |
soup1 = parsePage(cleanLinks[result]) | |
post = soup1.find_all(class_='post-text') | |
return post | |
#This function is needed due to discord's message limit to 2000 characters | |
def chunks(s, n): | |
"""output string s in n chunks""" | |
chunkList = [] | |
length = len(s) | |
count = int(length / n) | |
num = 0 | |
k = n | |
for i in range(count): | |
chunk = s[num:k] | |
num = k | |
k = k + n | |
chunkList.append(chunk) | |
rest = length % n | |
end = s[-rest:] | |
chunkList.append(end) | |
return chunkList | |
#print first question. FYI I've tried to create a function to post single question to avoid repetition of code, but | |
#it was impossible to do as you cannot use await inside a function | |
page = 0 | |
answer = 1 | |
try: | |
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>') | |
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**' + postPage(page)[0].text) | |
except: | |
#in case question is longer that discord's message limit which is 2000 characters | |
try: | |
long = postPage(page)[0].text | |
partedMessage = chunks(long, 1000) | |
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**') | |
for i in partedMessage: | |
await client.send_message(message.channel, i) | |
except: | |
await client.send_message(message.channel, 'Sorry, no questions found :( Try again.') | |
#print first answer | |
try: | |
await client.send_message(message.channel, '.\n\n\n\n__**Answer**__ ' + '**' + str(1) + '**' + postPage(page)[answer].text) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
except: | |
# in case answer is longer that discord's message limit which is 2000 characters | |
long = postPage(page)[answer].text | |
partedMessage = chunks(long, 1000) | |
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**') | |
for i in partedMessage: | |
await client.send_message(message.channel, i) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
#this loop allows user to scroll through all answers on stackoverflow page, and gives and option to move to next page | |
while page < (len(cleanLinks)-1): | |
# waits for user input | |
message = await client.wait_for_message(author=message.author) | |
choice = message.content | |
if '!f' not in choice: | |
await client.send_message(message.channel, 'Wrong command.') | |
#if users want to see next answer | |
elif 'stop' in choice: | |
break | |
elif 'na' in choice: | |
answer = answer + 1 | |
try: | |
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(answer) + '**' + postPage(page)[answer].text) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
except: | |
try: | |
# in case answer is longer that discord's message limit which is 2000 characters | |
long = postPage(page)[answer].text | |
partedMessage = chunks(long, 1000) | |
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**') | |
for i in partedMessage: | |
await client.send_message(message.channel, i) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
except: | |
await client.send_message(message.channel, "No more answers. Type '!f nq' for next question.") | |
# if users want to see next question (google result) | |
elif 'nq' in choice: | |
answer = 1 | |
page = page + 1 | |
print(page) | |
try: | |
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>') | |
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**' + postPage(page)[0].text) | |
except: | |
# in case question is longer that discord's message limit which is 2000 characters | |
long = postPage(page)[0].text | |
partedMessage = chunks(long, 1000) | |
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**') | |
for i in partedMessage: | |
await client.send_message(message.channel, i) | |
try: | |
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**' + postPage(page)[answer].text) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
except: | |
try: | |
# in case answer is longer that discord's message limit which is 2000 characters | |
long = postPage(page)[answer].text | |
partedMessage = chunks(long, 1000) | |
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**') | |
for i in partedMessage: | |
await client.send_message(message.channel, i) | |
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question") | |
except: | |
await client.send_message(message.channel, "No more answers. Type '!f nq' for next question.") | |
await client.send_message(message.channel, 'No more questions :( ') |