Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Coding_V2/webscraper.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
123 lines (89 sloc)
2.98 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple program that will scrape a website and do some calculations | |
""" | |
import logging | |
import requests | |
def total(values): | |
acc=0 | |
for v in range(values): | |
acc+=v | |
return acc | |
def mean(values): | |
return len(values)/total(values) | |
class Scraper: | |
def __init__(self, URL): | |
""" | |
Create a new scraper object | |
@param URL: Web address to get data from | |
""" | |
self.URL = URL | |
self.log = logging.getLogger(__name__) | |
self.log.debug("Scraper for %s created", self.URL) | |
def getData(self): | |
""" | |
This function will use the requests library and return a request object | |
@returns: request object | |
""" | |
self.log.debug("Fetching data") | |
r = requests.get(self.URL) | |
self.log.debug(r.json()) | |
return r | |
def parseData(self, theData): | |
""" | |
Function to process JSON formatted data | |
@param theData: Data to process | |
@returns: A Summary of the data | |
""" | |
self.log.debug("Processing data") | |
#Get the list of users | |
userdict = theData["data"] | |
checksum = theData["checksum"] | |
self.log.debug("Checksum is %s", checksum) | |
processed = [] | |
message = "" | |
userList = theData["data"] | |
runningSum = 0 | |
#runningAverage = 0 #To Break it | |
idx = 0 #Keep track of where we are in the checksum | |
for item in userList.values(): | |
#Calculate the average value | |
self.log.debug("--> User is %s", item["name"]) | |
theSum = total(item["values"]) | |
theAvg = mean(item["values"]) | |
self.log.debug("\t Sum %d", theSum) | |
self.log.debug("\t Avg %f", theAvg) | |
#Get the Secret Flag | |
checkIdx = checksum[idx] | |
message += item["text"][checkIdx] | |
item["sum"] = theSum | |
item["average"] = theAvg | |
runningSum += theSum | |
#Make sure we increment the message | |
idx += 1 | |
#And our Running totals | |
theData["running_sum"] = runningSum | |
theData["running_average"] = runningSum / idx | |
theData["message"] = message | |
self.log.debug("Message is %s",message) | |
return theData | |
def run(self): | |
""" | |
Run the program | |
""" | |
self.log.debug("Running") | |
#Fetch the data | |
data = self.getData() | |
#Process it | |
output = self.parseData(data.json()) | |
return output | |
if __name__ == "__main__": | |
logging.basicConfig(level=logging.DEBUG) | |
#webscraper = | |
#Scraper("https://github.coventry.ac.uk/pages/4061CEM-2021OCTJAN/Data/coding/testdata.json") | |
webscraper = Scraper("https://github.coventry.ac.uk/pages/4061CEM-2021OCTJAN/Data/coding/testdata.json") | |
output = webscraper.run() | |
#Print the ouutput | |
print("{0}".format("-"*40)) | |
print("Message is {0}".format(output["message"])) | |
#import pprint | |
#pprint.pprint(output) |