referencing

czarniek · Nov 24, 2018 · d04929e2340fb3d93a648363bf4d188cfae4c3d0 · d04929e
1 parent 887484b
commit d04929e2340fb3d93a648363bf4d188cfae4c3d0
Show file tree

Hide file tree

Showing 4 changed files with 111 additions and 28 deletions.
diff --git a/hangman.py b/hangman.py
@@ -1,14 +1,20 @@
 import requests
 import random
 import time
-import discord
+
+"""
+Libraries: 
+ - requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
+"""
 
 
 async def hangman(client, message):
 
 
     #GAME SETUP
 
+
+    #Creating a list of random words from url
     word_url = "http://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain"
     response = requests.get(word_url)
     words = response.content.decode().splitlines()
@@ -27,11 +33,6 @@ async def hangman(client, message):
         wordEmpty.append('__   __')
 
 
-
-
-
-
-
     # GAME SCREEN
 
     scoreNum = 0

diff --git a/stackoverflow.py b/stackoverflow.py
@@ -1,18 +1,38 @@
 import requests
 from bs4 import BeautifulSoup
 
+"""
+Libraries: 
+ - requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
+ - bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages
+"""
 
-async def stackoverflow(client, message):
+"""
+I've learned how to use Beautiful Soup by following its documentation:
+https://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
+and this tutorial:
+https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
+by Lisa Tagliaferri
 
+"""
+
+async def stackoverflow(client, message):
+
+    #message.content at this point is: !flow user_query_here
     url = message.content[6:]
     url = url.lower().split(' ')
     url = "+".join(url)
     url = 'https://www.google.com/search?q=' + url + '+site%3Astackoverflow.com'
 
 
     #preventing google from blocking bots
+    #########################################################
+    # Based on answer provided by alecxe (https://stackoverflow.com/questions/27652543/how-to-use-python-requests-to-fake-a-browser-visit)
+
     headers = {
         'User-Agent': 'Mozilla/5.0'
+        # if above is not working, try one of these:
         #'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0'
         #'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
         #'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
@@ -21,6 +41,9 @@ async def stackoverflow(client, message):
     def parsePage(url):
         """take url as a string and parse it"""
         page = requests.get(url, headers=headers)
+
+    #########################################################
+
         soup = BeautifulSoup(page.text, 'html.parser')
         return soup
 
@@ -37,36 +60,39 @@ async def stackoverflow(client, message):
         else:
             delete.append(badLink)
 
-    #search for google search results and put them in 'good' list
+    #search for google search results and put them in 'cleanLinks' list
     searches = soup.find_all('a')
 
     x=0
-    good = []
+    cleanLinks = []
     for link in searches:
         Flink = link['href']
         Flink = Flink[7:]
+
+        #filtering links not targeted
         if 'questions' in Flink:
             if 'webcache' in Flink:
                 continue
             elif 'meta' in Flink:
                 continue
             else:
-                good.append(Flink)
+                cleanLinks.append(Flink)
                 for match in delete:
                     if match in Flink:
-                        good.remove(Flink)
+                        cleanLinks.remove(Flink)
                     else:
                         continue
 
         else:
             continue
 
     def postPage(result):
-        """take in url as string and find a post on stackoverflow"""
-        soup1 = parsePage(good[result])
+        """take in url (meant to be from stackoverlow.com) as string and return a post by user on stackoverflow"""
+        soup1 = parsePage(cleanLinks[result])
         post = soup1.find_all(class_='post-text')
         return post
 
+    #This function is needed due to discord's message limit to 2000 characters
     def chunks(s, n):
         """output string s in n chunks"""
         chunkList = []
@@ -88,15 +114,13 @@ async def stackoverflow(client, message):
         return chunkList
 
 
+    #print first question. FYI I've tried to create a function to post single question to avoid repetition of code, but
+    #it was impossible to do as you cannot use await inside a function
 
-
-
-
-    #print first question
     page = 0
     answer = 1
     try:
-        await client.send_message(message.channel, 'URL ' + '<' + good[page] + '>')
+        await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
         await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(1) + '**' + postPage(page)[0].text)
     except:
         #in case question is longer that discord's message limit which is 2000 characters
@@ -129,7 +153,7 @@ async def stackoverflow(client, message):
 
 
     #this loop allows user to scroll through all answers on stackoverflow page, and gives and option to move to next page
-    while page < (len(good)-1):
+    while page < (len(cleanLinks)-1):
         # waits for user input
         message = await client.wait_for_message(author=message.author)
         choice = message.content
@@ -165,7 +189,7 @@ async def stackoverflow(client, message):
             page = page + 1
             print(page)
             try:
-                await client.send_message(message.channel, 'URL ' + '<' + good[page] + '>')
+                await client.send_message(message.channel, 'URL ' + '<' + cleanLinks[page] + '>')
                 await client.send_message(message.channel, '.\n\n__**Question**__ ' + '**' + str(page + 1) + '**' + postPage(page)[0].text)
             except:
                 # in case question is longer that discord's message limit which is 2000 characters

diff --git a/translatesimple.py b/translatesimple.py
@@ -2,13 +2,36 @@ from googletrans import Translator
 from bs4 import BeautifulSoup
 import requests
 
+"""
+Libraries: 
+ - requests (http://docs.python-requests.org/en/master/) by Kenneth Reitz for web requests
+ - bs4 (https://www.crummy.com/software/BeautifulSoup/) by Leonard Richardson for parsing HTML pages
+ - googletrans (https://pypi.org/project/googletrans/) by SuHun Han for translating
+"""
+
+"""
+I've learned how to use Beautiful Soup by following its documentation:
+https://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
+and this tutorial:
+https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
+by Lisa Tagliaferri
+
+"""
 
 async def translate(client, message):
 
     def parsePage(url):
         """take url as a string and parse it"""
+
+        ######################################################
+        # Based on Lisa Tagliaferri's tutorial
+
         page = requests.get(url)
         soup = BeautifulSoup(page.content, 'html.parser')
+
+        #####################################################
+
         return soup
 
 
@@ -38,7 +61,12 @@ async def translate(client, message):
 
     query = " ".join(queryList)
 
+    ####################################################
+    # Based on example from: https://pypi.org/project/googletrans/
+
     translator = Translator()
     result = translator.translate(query, dest=toLan)
 
+    ####################################################
+
     await client.send_message(message.channel, 'Translation: ' + result.text)
diff --git a/wikipedia.py b/wikipedia.py
@@ -9,9 +9,19 @@ Libraries:
  - discord.py (https://github.com/Rapptz/discord.py/) by Rapptz used to connect to Discord.
 """
 
+"""
+I've learned how to use Beautiful Soup by following its documentation:
+https://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
+and this tutorial:
+https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3
+by Lisa Tagliaferri
+
+"""
 
 async def wikipedia(client, message):
 
+    #message.content at this point is: "!wiki user_query
     title = message.content[6:]
     title = title.lower().split(' ')
     title = "_".join(title)
@@ -20,13 +30,19 @@ async def wikipedia(client, message):
 
 
     def pageContentFun(url):
-        """Take URL as string and parse its body"""
+        """Take URL as string and return its parsed body"""
+
+        ######################################################
+        # Based on Lisa Tagliaferri's tutorial
+
         page = requests.get(url)
 
         soup = BeautifulSoup(page.text, 'html.parser')
 
         x = soup.find(class_='mw-parser-output')
 
+        ######################################################
+
         #deletes parts of html that sometimes make the function find wrong body
         deleteEmpty = x.find_all(class_='mw-empty-elt')
         for empty in deleteEmpty:
@@ -43,16 +59,24 @@ async def wikipedia(client, message):
 
     def findImage(url):
         """Take url as string and find first img in body and return its url"""
+
+        ######################################################
+        # Based on Lisa Tagliaferri's tutorial
+
         page = requests.get(url)
 
         soup = BeautifulSoup(page.text, 'html.parser')
 
         image = soup.find(class_='image')
 
+        ######################################################
+
         img = image.find('img')
 
         imgURL = img['src']
 
+
+        #specific case when "question book" image is found instead of target image. Delete it
         if 'Question_book' in imgURL:
             image.decompose()
 
@@ -65,10 +89,10 @@ async def wikipedia(client, message):
         return imgURL
 
     def findP(x):
-        """take site's body found earlier and look for first paragraph"""
+        """take site's body found earlier and return first paragraph"""
         pageContentp = x.find_all('p')
 
-
+        #there are some specific cases where the first paragraph isn't the one we're looking for, so take next one
         if 'Coordinates' in pageContentp[0].text:
             y = pageContentp[1].text
         else:
@@ -84,7 +108,7 @@ async def wikipedia(client, message):
 
 
 
-    #some wiki sites give you few option to choose from. Filter those sites
+    #some wiki sites give you few option to choose from (try searching wiki for eg New York). Filter those sites
     if "most commonly refers to:" in cleanText:
 
         #find part of page that is a list of options
@@ -133,20 +157,26 @@ async def wikipedia(client, message):
 
 
 
-    # cleaning text from eg [1], [2] , [1 note]
+    # cleaning text from parts like [1], [2] , [1 note] often found on wikipedia
     x = 1
-    while x < 40:
+    while x < 45:
         cleanText = cleanText.replace('[' + str(x) + ']', '')
         x = x + 1
 
     cleanText = cleanText.replace('( listen)', '')
-    cleanText = cleanText.replace('[note 1]', '')
-    cleanText = cleanText.replace('[note 2]', '')
+
+    y = 1
+    while y < 10:
+        cleanText = cleanText.replace('[note ' + str(y) + ']', '')
+        y = y + 1
 
 
 
     imgFull = 'https:' + imgURL
     URLnolink = '<' + URL + '>'
+
+    #when an image is posted to discord chat, its url is posted as well.
+    # Creating embed message allows posting images witout url
     em = discord.Embed()
     em.set_image(url=imgFull)