Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import requests
from bs4 import BeautifulSoup
"""
Libraries:
- requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
- bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages
"""
"""
I've learned how to use Beautiful Soup by following its documentation:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
and this tutorial:
https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
by Lisa Tagliaferri
"""
async def stackoverflow(client, message):
#message.content at this point is: !flow user_query_here
url = message.content[6:]
url = url.lower().split(' ')
url = "+".join(url)
url = 'https://www.google.com/search?q=' + url + '+site%3Astackoverflow.com'
#preventing google from blocking bots
#########################################################
# Based on answer provided by alecxe (https://stackoverflow.com/questions/27652543/how-to-use-python-requests-to-fake-a-browser-visit)
headers = {
'User-Agent': 'Mozilla/5.0'
# if above is not working, try one of these:
#'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0'
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
}
def parsePage(url):
"""take url as a string and parse it"""
page = requests.get(url, headers=headers)
#########################################################
soup = BeautifulSoup(page.text, 'html.parser')
return soup
soup = parsePage(url)
#delete image links
imgLinks = soup.find_all('img')
delete = []
for link in imgLinks:
badLink = link['title']
if 'meta' in badLink:
continue
else:
delete.append(badLink)
#search for google search results and put them in 'cleanLinks' list
searches = soup.find_all('a')
x=0
cleanLinks = []
for link in searches:
Flink = link['href']
Flink = Flink[7:]
#filtering links not targeted
if 'questions' in Flink:
if 'webcache' in Flink:
continue
elif 'meta' in Flink:
continue
else:
cleanLinks.append(Flink)
for match in delete:
if match in Flink:
cleanLinks.remove(Flink)
else:
continue
else:
continue
def postPage(result):
"""take in url (meant to be from stackoverlow.com) as string and return a post by user on stackoverflow"""
soup1 = parsePage(cleanLinks[result])
post = soup1.find_all(class_='post-text')
return post
#This function is needed due to discord's message limit to 2000 characters
def chunks(s, n):
"""output string s in n chunks"""
chunkList = []
length = len(s)
count = int(length / n)
num = 0
k = n
for i in range(count):
chunk = s[num:k]
num = k
k = k + n
chunkList.append(chunk)
rest = length % n
end = s[-rest:]
chunkList.append(end)
return chunkList
#print first question. FYI I've tried to create a function to post single question to avoid repetition of code, but
#it was impossible to do as you cannot use await inside a function
page = 0
answer = 1
try:
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**' + postPage(page)[0].text)
except:
#in case question is longer that discord's message limit which is 2000 characters
try:
long = postPage(page)[0].text
partedMessage = chunks(long, 1000)
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**')
for i in partedMessage:
await client.send_message(message.channel, i)
except:
await client.send_message(message.channel, 'Sorry, no questions found :( Try again.')
#print first answer
try:
await client.send_message(message.channel, '.\n\n\n\n__**Answer**__ ' + '**' + str(1) + '**' + postPage(page)[answer].text)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
except:
# in case answer is longer that discord's message limit which is 2000 characters
long = postPage(page)[answer].text
partedMessage = chunks(long, 1000)
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**')
for i in partedMessage:
await client.send_message(message.channel, i)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
#this loop allows user to scroll through all answers on stackoverflow page, and gives and option to move to next page
while page < (len(cleanLinks)-1):
# waits for user input
message = await client.wait_for_message(author=message.author)
choice = message.content
if '!f' not in choice:
await client.send_message(message.channel, 'Wrong command.')
#if users want to see next answer
elif 'stop' in choice:
break
elif 'na' in choice:
answer = answer + 1
try:
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(answer) + '**' + postPage(page)[answer].text)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
except:
try:
# in case answer is longer that discord's message limit which is 2000 characters
long = postPage(page)[answer].text
partedMessage = chunks(long, 1000)
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**')
for i in partedMessage:
await client.send_message(message.channel, i)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
except:
await client.send_message(message.channel, "No more answers. Type '!f nq' for next question.")
# if users want to see next question (google result)
elif 'nq' in choice:
answer = 1
page = page + 1
print(page)
try:
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**' + postPage(page)[0].text)
except:
# in case question is longer that discord's message limit which is 2000 characters
long = postPage(page)[0].text
partedMessage = chunks(long, 1000)
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**')
for i in partedMessage:
await client.send_message(message.channel, i)
try:
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**' + postPage(page)[answer].text)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
except:
try:
# in case answer is longer that discord's message limit which is 2000 characters
long = postPage(page)[answer].text
partedMessage = chunks(long, 1000)
await client.send_message(message.channel, '__**Answer**__ ' + '**' + str(1) + '**')
for i in partedMessage:
await client.send_message(message.channel, i)
await client.send_message(message.channel, "Type '!f na' for next answer or '!f nq' for next relevant question")
except:
await client.send_message(message.channel, "No more answers. Type '!f nq' for next question.")
await client.send_message(message.channel, 'No more questions :( ')