Add files via upload

josej14 · Nov 21, 2019 · 25ce945ba88f229302db36bf2ac2a86d81b7b0fd · 25ce945
1 parent 9f816d5
commit 25ce945ba88f229302db36bf2ac2a86d81b7b0fd
Show file tree

Hide file tree

Showing 3 changed files with 160 additions and 0 deletions.
diff --git a/Wikipedia.py b/Wikipedia.py
@@ -0,0 +1,41 @@
+# This whole code was copied from https://www.thepythoncode.com/article/access-wikipedia-python?fbclid=IwAR0oEeD19gpwYc99fvv0bPkWOHHZOP1Hk_j8JMw0J07Z6taDBGhRiZJ8uZ8
+
+import wikipedia
+# print the summary of what python is
+print(wikipedia.summary("Python Programming Language"))
+
+# search for a term
+result = wikipedia.search("Neural networks")
+print("Result search of 'Neaural networks':", result)
+
+#get the page: Neural network
+page = wikipedia.page(result[0])
+
+# get the title of the page
+title = page.title
+
+# get the categories of the page
+categories = page.categories
+
+# get the whole wikipedia page text (content)
+content = page.content
+
+#get all the links in the page
+links = page.links
+
+# get the page references
+references = page.references
+
+#summary
+summary = page.summary
+
+# print info
+print("Page content:\n", content, "\n")
+print("Page title:", title, "\n")
+print("Categories:", categories, "\n")
+print("Links:", links, "\n")
+print("References:", references, "\n")
+print("Summary:", summary, "\n")
+
+
+
diff --git a/weather.py b/weather.py
@@ -0,0 +1,30 @@
+#This whole section of code is the weather feature
+#code copied from: https://www.youtube.com/watch?v=gOWm5rF_qdc&list=PLtN7kQKfzoJOJYt_yhn_xHWsORCMsEOXq&index=4&t=194s
+
+#pwown is a module
+#Needs to be installed first for the code to be able to run
+import pyowm
+
+#api code within the bracket was obtained from: https://openweathermap.org/
+own = pyowm.OWM('7050f6a47e2d8369b80ae88aafb6c357')
+
+#The user inputs the name of the location
+#The variable 'inputLoc' was created on my own
+inputLoc = input("tell me your location: ")
+location = own.weather_at_place(inputLoc)
+weather = location.get_weather()
+
+temp = weather.get_temperature('celsius')
+humidity = weather.get_humidity()
+
+#receive the temperature (in celcius) and humidity of chosen location
+print(temp)
+print(humidity)
+
+
+
+
+
+
+
+
diff --git a/website links.py b/website links.py
@@ -0,0 +1,89 @@
+# Whole code copied from https://www.thepythoncode.com/article/extract-all-website-links-python?fbclid=IwAR1v_yosVk4OIlyWkJgbwqO9HRfEnmdOzNySP8HFogJF5PJ0gM9tmQMwvGA
+
+import requests
+from urllib.request import urlparse, urljoin
+from bs4 import BeautifulSoup
+import colorama
+
+# init the colorama module
+colorama.init()
+GREEN = colorama.Force.Green
+GRAY = colorama.Force.LIGHTBLACK_EX
+RESET = colorama.Force.RESET
+
+# initialize the set of links (unique links)
+internal_urls = set()
+external_urls = set()
+
+def is_valid(url):
+    """
+    Checks whether 'url' is a valid URL.
+    """
+    parsed = urlparse(url)
+    return bool(parsed.netloc) and bool(parsed.scheme)
+
+def get_all_website_links(url):
+    """
+    Returns all URLS that is found on 'url' in which it belongs to the same website
+    """
+
+# all URLs of 'url'
+urls = set()
+# domain name of the URL without the protocol
+domain_name = urlparse(url).netloc
+soup = BeautifulSoup(requests.get(url).content, "html.parser")
+
+for a_tag in soup.findAll("a"):
+    href = a_tag.attrs.get("href")
+    if href == "" or href in None:
+    # href empty tag
+    continue
+
+# join the url if it's relative (not absolute link)
+href = urljoin(url, href)
+
+parsed_href = urlparse(href)
+
+#remove URL GET parameters, URL fragments, etc.
+href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
+
+if not is_valid(href):
+    #not a valid URL
+    continue
+if href in internal_urls:
+    #already in the set
+    continue
+if domain_name not in href:
+    # external link
+    if href not in external_urls:
+        print(f"{GRAY}[!] External link: {href}{RESET}")
+        external_urls.add(href)
+    continue
+print(f"{GREEN}[*] Internal link: {href}{RESET}")
+urls.add(href)
+internal_urls.add(href)
+return urls
+
+# number of urls visited so far will be stored here
+total_urls_visited = 0
+
+def crawl(url, max_urls=50):
+    """
+    Crawls a web page and extracts all links.
+    You'll find all links in 'external_urls' and internal_urls global set variables.
+    params:
+        max_urls (int): number of max urls to crawl , default is 30.
+
+    """
+    global total_urls_visited
+    total_urls_visited += 1
+    links = get_all_website_links(url)
+    for link in links:
+        if total_urls_visited > max_urls:
+            break
+        crawl(link, max_urls=max_urls)
+
+if _name_ == "_main_":
+    crawl("https://www.thepythoncode.com")
+    print([+])
+