Skip to content
Permalink
Browse files
referencing
  • Loading branch information
czarniek committed Nov 24, 2018
1 parent 887484b commit d04929e2340fb3d93a648363bf4d188cfae4c3d0
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 28 deletions.
@@ -1,14 +1,20 @@
import requests
import random
import time
import discord

"""
Libraries:
- requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
"""


async def hangman(client, message):


#GAME SETUP


#Creating a list of random words from url
word_url = "http://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain"
response = requests.get(word_url)
words = response.content.decode().splitlines()
@@ -27,11 +33,6 @@ async def hangman(client, message):
wordEmpty.append('__ __')







# GAME SCREEN

scoreNum = 0
@@ -1,18 +1,38 @@
import requests
from bs4 import BeautifulSoup

"""
Libraries:
- requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
- bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages
"""

async def stackoverflow(client, message):
"""
I've learned how to use Beautiful Soup by following its documentation:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
and this tutorial:
https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
by Lisa Tagliaferri
"""

async def stackoverflow(client, message):

#message.content at this point is: !flow user_query_here
url = message.content[6:]
url = url.lower().split(' ')
url = "+".join(url)
url = 'https://www.google.com/search?q=' + url + '+site%3Astackoverflow.com'


#preventing google from blocking bots
#########################################################
# Based on answer provided by alecxe (https://stackoverflow.com/questions/27652543/how-to-use-python-requests-to-fake-a-browser-visit)

headers = {
'User-Agent': 'Mozilla/5.0'
# if above is not working, try one of these:
#'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0'
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
#'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
@@ -21,6 +41,9 @@ async def stackoverflow(client, message):
def parsePage(url):
"""take url as a string and parse it"""
page = requests.get(url, headers=headers)

#########################################################

soup = BeautifulSoup(page.text, 'html.parser')
return soup

@@ -37,36 +60,39 @@ async def stackoverflow(client, message):
else:
delete.append(badLink)

#search for google search results and put them in 'good' list
#search for google search results and put them in 'cleanLinks' list
searches = soup.find_all('a')

x=0
good = []
cleanLinks = []
for link in searches:
Flink = link['href']
Flink = Flink[7:]

#filtering links not targeted
if 'questions' in Flink:
if 'webcache' in Flink:
continue
elif 'meta' in Flink:
continue
else:
good.append(Flink)
cleanLinks.append(Flink)
for match in delete:
if match in Flink:
good.remove(Flink)
cleanLinks.remove(Flink)
else:
continue

else:
continue

def postPage(result):
"""take in url as string and find a post on stackoverflow"""
soup1 = parsePage(good[result])
"""take in url (meant to be from stackoverlow.com) as string and return a post by user on stackoverflow"""
soup1 = parsePage(cleanLinks[result])
post = soup1.find_all(class_='post-text')
return post

#This function is needed due to discord's message limit to 2000 characters
def chunks(s, n):
"""output string s in n chunks"""
chunkList = []
@@ -88,15 +114,13 @@ async def stackoverflow(client, message):
return chunkList


#print first question. FYI I've tried to create a function to post single question to avoid repetition of code, but
#it was impossible to do as you cannot use await inside a function




#print first question
page = 0
answer = 1
try:
await client.send_message(message.channel, 'URL ' + '<' + good[page] + '>')
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**' + postPage(page)[0].text)
except:
#in case question is longer that discord's message limit which is 2000 characters
@@ -129,7 +153,7 @@ async def stackoverflow(client, message):


#this loop allows user to scroll through all answers on stackoverflow page, and gives and option to move to next page
while page < (len(good)-1):
while page < (len(cleanLinks)-1):
# waits for user input
message = await client.wait_for_message(author=message.author)
choice = message.content
@@ -165,7 +189,7 @@ async def stackoverflow(client, message):
page = page + 1
print(page)
try:
await client.send_message(message.channel, 'URL ' + '<' + good[page] + '>')
await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**' + postPage(page)[0].text)
except:
# in case question is longer that discord's message limit which is 2000 characters
@@ -2,13 +2,36 @@ from googletrans import Translator
from bs4 import BeautifulSoup
import requests

"""
Libraries:
- requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
- bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages
- googletrans (https://pypi.org/project/googletrans/) by SuHun Han for translating
"""

"""
I've learned how to use Beautiful Soup by following its documentation:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
and this tutorial:
https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
by Lisa Tagliaferri
"""

async def translate(client, message):

def parsePage(url):
"""take url as a string and parse it"""

######################################################
# Based on Lisa Tagliaferri's tutorial

page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

#####################################################

return soup


@@ -38,7 +61,12 @@ async def translate(client, message):

query = " ".join(queryList)

####################################################
# Based on example from: https://pypi.org/project/googletrans/

translator = Translator()
result = translator.translate(query, dest=toLan)

####################################################

await client.send_message(message.channel, 'Translation: ' + result.text)
@@ -9,9 +9,19 @@ Libraries:
- discord.py (https://github.com/Rapptz/discord.py/) by Rapptz used to connect to Discord.
"""

"""
I've learned how to use Beautiful Soup by following its documentation:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
and this tutorial:
https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
by Lisa Tagliaferri
"""

async def wikipedia(client, message):

#message.content at this point is: "!wiki user_query
title = message.content[6:]
title = title.lower().split(' ')
title = "_".join(title)
@@ -20,13 +30,19 @@ async def wikipedia(client, message):


def pageContentFun(url):
"""Take URL as string and parse its body"""
"""Take URL as string and return its parsed body"""

######################################################
# Based on Lisa Tagliaferri's tutorial

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html.parser')

x = soup.find(class_='mw-parser-output')

######################################################

#deletes parts of html that sometimes make the function find wrong body
deleteEmpty = x.find_all(class_='mw-empty-elt')
for empty in deleteEmpty:
@@ -43,16 +59,24 @@ async def wikipedia(client, message):

def findImage(url):
"""Take url as string and find first img in body and return its url"""

######################################################
# Based on Lisa Tagliaferri's tutorial

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html.parser')

image = soup.find(class_='image')

######################################################

img = image.find('img')

imgURL = img['src']


#specific case when "question book" image is found instead of target image. Delete it
if 'Question_book' in imgURL:
image.decompose()

@@ -65,10 +89,10 @@ async def wikipedia(client, message):
return imgURL

def findP(x):
"""take site's body found earlier and look for first paragraph"""
"""take site's body found earlier and return first paragraph"""
pageContentp = x.find_all('p')


#there are some specific cases where the first paragraph isn't the one we're looking for, so take next one
if 'Coordinates' in pageContentp[0].text:
y = pageContentp[1].text
else:
@@ -84,7 +108,7 @@ async def wikipedia(client, message):



#some wiki sites give you few option to choose from. Filter those sites
#some wiki sites give you few option to choose from (try searching wiki for eg New York). Filter those sites
if "most commonly refers to:" in cleanText:

#find part of page that is a list of options
@@ -133,20 +157,26 @@ async def wikipedia(client, message):



# cleaning text from eg [1], [2] , [1 note]
# cleaning text from parts like [1], [2] , [1 note] often found on wikipedia
x = 1
while x < 40:
while x < 45:
cleanText = cleanText.replace('[' + str(x) + ']', '')
x = x + 1

cleanText = cleanText.replace('( listen)', '')
cleanText = cleanText.replace('[note 1]', '')
cleanText = cleanText.replace('[note 2]', '')

y = 1
while y < 10:
cleanText = cleanText.replace('[note ' + str(y) + ']', '')
y = y + 1



imgFull = 'https:' + imgURL
URLnolink = '<' + URL + '>'

#when an image is posted to discord chat, its url is posted as well.
# Creating embed message allows posting images witout url
em = discord.Embed()
em.set_image(url=imgFull)

0 comments on commit d04929e

Please sign in to comment.