From 25ce945ba88f229302db36bf2ac2a86d81b7b0fd Mon Sep 17 00:00:00 2001
From: "Joshua Jose (josej14)" <josej14@coventry.ac.uk>
Date: Thu, 21 Nov 2019 11:04:53 +0000
Subject: [PATCH] Add files via upload

---
 Wikipedia.py     | 41 ++++++++++++++++++++++
 weather.py       | 30 ++++++++++++++++
 website links.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 Wikipedia.py
 create mode 100644 weather.py
 create mode 100644 website links.py

diff --git a/Wikipedia.py b/Wikipedia.py
new file mode 100644
index 0000000..61e8c93
--- /dev/null
+++ b/Wikipedia.py
@@ -0,0 +1,41 @@
+# This whole code was copied from https://www.thepythoncode.com/article/access-wikipedia-python?fbclid=IwAR0oEeD19gpwYc99fvv0bPkWOHHZOP1Hk_j8JMw0J07Z6taDBGhRiZJ8uZ8
+
+import wikipedia
+# print the summary of what python is
+print(wikipedia.summary("Python Programming Language"))
+
+# search for a term
+result = wikipedia.search("Neural networks")
+print("Result search of 'Neaural networks':", result)
+
+#get the page: Neural network
+page = wikipedia.page(result[0])
+
+# get the title of the page
+title = page.title
+
+# get the categories of the page
+categories = page.categories
+
+# get the whole wikipedia page text (content)
+content = page.content
+
+#get all the links in the page
+links = page.links
+
+# get the page references
+references = page.references
+
+#summary
+summary = page.summary
+
+# print info
+print("Page content:\n", content, "\n")
+print("Page title:", title, "\n")
+print("Categories:", categories, "\n")
+print("Links:", links, "\n")
+print("References:", references, "\n")
+print("Summary:", summary, "\n")
+
+
+ 
diff --git a/weather.py b/weather.py
new file mode 100644
index 0000000..c3b144a
--- /dev/null
+++ b/weather.py
@@ -0,0 +1,30 @@
+#This whole section of code is the weather feature
+#code copied from: https://www.youtube.com/watch?v=gOWm5rF_qdc&list=PLtN7kQKfzoJOJYt_yhn_xHWsORCMsEOXq&index=4&t=194s
+
+#pwown is a module
+#Needs to be installed first for the code to be able to run
+import pyowm
+
+#api code within the bracket was obtained from: https://openweathermap.org/
+own = pyowm.OWM('7050f6a47e2d8369b80ae88aafb6c357')
+
+#The user inputs the name of the location
+#The variable 'inputLoc' was created on my own
+inputLoc = input("tell me your location: ")
+location = own.weather_at_place(inputLoc)
+weather = location.get_weather()
+
+temp = weather.get_temperature('celsius')
+humidity = weather.get_humidity()
+
+#receive the temperature (in celcius) and humidity of chosen location
+print(temp)
+print(humidity)
+
+
+
+
+
+    
+
+
diff --git a/website links.py b/website links.py
new file mode 100644
index 0000000..a06a795
--- /dev/null
+++ b/website links.py	
@@ -0,0 +1,89 @@
+# Whole code copied from https://www.thepythoncode.com/article/extract-all-website-links-python?fbclid=IwAR1v_yosVk4OIlyWkJgbwqO9HRfEnmdOzNySP8HFogJF5PJ0gM9tmQMwvGA
+
+import requests
+from urllib.request import urlparse, urljoin
+from bs4 import BeautifulSoup
+import colorama
+
+# init the colorama module
+colorama.init()
+GREEN = colorama.Force.Green
+GRAY = colorama.Force.LIGHTBLACK_EX
+RESET = colorama.Force.RESET
+
+# initialize the set of links (unique links)
+internal_urls = set()
+external_urls = set()
+
+def is_valid(url):
+    """
+    Checks whether 'url' is a valid URL.
+    """
+    parsed = urlparse(url)
+    return bool(parsed.netloc) and bool(parsed.scheme)
+
+def get_all_website_links(url):
+    """
+    Returns all URLS that is found on 'url' in which it belongs to the same website
+    """
+
+# all URLs of 'url'
+urls = set()
+# domain name of the URL without the protocol
+domain_name = urlparse(url).netloc
+soup = BeautifulSoup(requests.get(url).content, "html.parser")
+
+for a_tag in soup.findAll("a"):
+    href = a_tag.attrs.get("href")
+    if href == "" or href in None:
+    # href empty tag
+    continue
+
+# join the url if it's relative (not absolute link)
+href = urljoin(url, href)
+
+parsed_href = urlparse(href)
+
+#remove URL GET parameters, URL fragments, etc.
+href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
+
+if not is_valid(href):
+    #not a valid URL
+    continue
+if href in internal_urls:
+    #already in the set
+    continue
+if domain_name not in href:
+    # external link
+    if href not in external_urls:
+        print(f"{GRAY}[!] External link: {href}{RESET}")
+        external_urls.add(href)
+    continue
+print(f"{GREEN}[*] Internal link: {href}{RESET}")
+urls.add(href)
+internal_urls.add(href)
+return urls
+
+# number of urls visited so far will be stored here
+total_urls_visited = 0
+
+def crawl(url, max_urls=50):
+    """
+    Crawls a web page and extracts all links.
+    You'll find all links in 'external_urls' and internal_urls global set variables.
+    params:
+        max_urls (int): number of max urls to crawl , default is 30.
+
+    """
+    global total_urls_visited
+    total_urls_visited += 1
+    links = get_all_website_links(url)
+    for link in links:
+        if total_urls_visited > max_urls:
+            break
+        crawl(link, max_urls=max_urls)
+
+if _name_ == "_main_":
+    crawl("https://www.thepythoncode.com")
+    print([+])
+