Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# This whole code was copied from https://www.thepythoncode.com/article/access-wikipedia-python?fbclid=IwAR0oEeD19gpwYc99fvv0bPkWOHHZOP1Hk_j8JMw0J07Z6taDBGhRiZJ8uZ8 | ||
|
||
import wikipedia | ||
# print the summary of what python is | ||
print(wikipedia.summary("Python Programming Language")) | ||
|
||
# search for a term | ||
result = wikipedia.search("Neural networks") | ||
print("Result search of 'Neaural networks':", result) | ||
|
||
#get the page: Neural network | ||
page = wikipedia.page(result[0]) | ||
|
||
# get the title of the page | ||
title = page.title | ||
|
||
# get the categories of the page | ||
categories = page.categories | ||
|
||
# get the whole wikipedia page text (content) | ||
content = page.content | ||
|
||
#get all the links in the page | ||
links = page.links | ||
|
||
# get the page references | ||
references = page.references | ||
|
||
#summary | ||
summary = page.summary | ||
|
||
# print info | ||
print("Page content:\n", content, "\n") | ||
print("Page title:", title, "\n") | ||
print("Categories:", categories, "\n") | ||
print("Links:", links, "\n") | ||
print("References:", references, "\n") | ||
print("Summary:", summary, "\n") | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#This whole section of code is the weather feature | ||
#code copied from: https://www.youtube.com/watch?v=gOWm5rF_qdc&list=PLtN7kQKfzoJOJYt_yhn_xHWsORCMsEOXq&index=4&t=194s | ||
|
||
#pwown is a module | ||
#Needs to be installed first for the code to be able to run | ||
import pyowm | ||
|
||
#api code within the bracket was obtained from: https://openweathermap.org/ | ||
own = pyowm.OWM('7050f6a47e2d8369b80ae88aafb6c357') | ||
|
||
#The user inputs the name of the location | ||
#The variable 'inputLoc' was created on my own | ||
inputLoc = input("tell me your location: ") | ||
location = own.weather_at_place(inputLoc) | ||
weather = location.get_weather() | ||
|
||
temp = weather.get_temperature('celsius') | ||
humidity = weather.get_humidity() | ||
|
||
#receive the temperature (in celcius) and humidity of chosen location | ||
print(temp) | ||
print(humidity) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Whole code copied from https://www.thepythoncode.com/article/extract-all-website-links-python?fbclid=IwAR1v_yosVk4OIlyWkJgbwqO9HRfEnmdOzNySP8HFogJF5PJ0gM9tmQMwvGA | ||
|
||
import requests | ||
from urllib.request import urlparse, urljoin | ||
from bs4 import BeautifulSoup | ||
import colorama | ||
|
||
# init the colorama module | ||
colorama.init() | ||
GREEN = colorama.Force.Green | ||
GRAY = colorama.Force.LIGHTBLACK_EX | ||
RESET = colorama.Force.RESET | ||
|
||
# initialize the set of links (unique links) | ||
internal_urls = set() | ||
external_urls = set() | ||
|
||
def is_valid(url): | ||
""" | ||
Checks whether 'url' is a valid URL. | ||
""" | ||
parsed = urlparse(url) | ||
return bool(parsed.netloc) and bool(parsed.scheme) | ||
|
||
def get_all_website_links(url): | ||
""" | ||
Returns all URLS that is found on 'url' in which it belongs to the same website | ||
""" | ||
|
||
# all URLs of 'url' | ||
urls = set() | ||
# domain name of the URL without the protocol | ||
domain_name = urlparse(url).netloc | ||
soup = BeautifulSoup(requests.get(url).content, "html.parser") | ||
|
||
for a_tag in soup.findAll("a"): | ||
href = a_tag.attrs.get("href") | ||
if href == "" or href in None: | ||
# href empty tag | ||
continue | ||
|
||
# join the url if it's relative (not absolute link) | ||
href = urljoin(url, href) | ||
|
||
parsed_href = urlparse(href) | ||
|
||
#remove URL GET parameters, URL fragments, etc. | ||
href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path | ||
|
||
if not is_valid(href): | ||
#not a valid URL | ||
continue | ||
if href in internal_urls: | ||
#already in the set | ||
continue | ||
if domain_name not in href: | ||
# external link | ||
if href not in external_urls: | ||
print(f"{GRAY}[!] External link: {href}{RESET}") | ||
external_urls.add(href) | ||
continue | ||
print(f"{GREEN}[*] Internal link: {href}{RESET}") | ||
urls.add(href) | ||
internal_urls.add(href) | ||
return urls | ||
|
||
# number of urls visited so far will be stored here | ||
total_urls_visited = 0 | ||
|
||
def crawl(url, max_urls=50): | ||
""" | ||
Crawls a web page and extracts all links. | ||
You'll find all links in 'external_urls' and internal_urls global set variables. | ||
params: | ||
max_urls (int): number of max urls to crawl , default is 30. | ||
""" | ||
global total_urls_visited | ||
total_urls_visited += 1 | ||
links = get_all_website_links(url) | ||
for link in links: | ||
if total_urls_visited > max_urls: | ||
break | ||
crawl(link, max_urls=max_urls) | ||
|
||
if _name_ == "_main_": | ||
crawl("https://www.thepythoncode.com") | ||
print([+]) | ||
|