Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
reMarkAble/remarkable/parser.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
224 lines (175 sloc)
7.19 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
reMarkable: Convert markdown to docs using Templates | |
Copyright (C) 2020 Dan Goldsmith (djgoldsmith@googlemail.com) | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <https://www.gnu.org/licenses/>. | |
""" | |
import logging | |
import re | |
import yaml | |
import remarkable.section as section | |
class MarkdownParser(): | |
""" | |
Parser for Markdown input files | |
This will split out the header and body, and parse it. | |
output for sections is based on top level headings | |
""" | |
def __init__(self, theFile): | |
"""Create a markdown parser, | |
param theFile: File to use as input | |
""" | |
self.log = logging.getLogger("PARSER") | |
self.theFile = theFile | |
self.log.debug("Create Parser For: {0}".format(self.theFile)) | |
self.fd = open(theFile, "r", encoding="utf-8") | |
#And things we are storing | |
self.sections = None | |
self.header = None | |
#Add this for the generic template | |
self.sectionOrder = [] | |
def _parseSectionHeader(self, line): | |
"""Helper function to parse the Heading line for a section | |
@param line: Text to parse | |
@return: tuple of (text, marks) | |
""" | |
cleanLine = line.strip("#") #Remote Leading Hash | |
cleanLine = cleanLine.strip() #Remove Newline | |
marks = None | |
maxMarks = None | |
marksRe = re.compile(r"\[\s?(\d+)\s?/?(\s?\d+)?\s?\]") | |
#Search for any matches | |
theMatch = marksRe.search(cleanLine) | |
if theMatch: | |
marks = theMatch[1] #We know its the first group | |
maxMarks = theMatch[2] | |
#Cast to Integer | |
marks = int(marks) | |
if maxMarks: | |
maxMarks = int(maxMarks) | |
#And delete the marks part | |
cleanLine = marksRe.sub("", cleanLine) | |
cleanLine = cleanLine.strip() | |
#We also want to replace spaces with underscores | |
dictLine = cleanLine.replace(" ", "_") | |
self.log.debug("--> Section '{0}' (Marks {1} Max {2}) ".format(dictLine, | |
marks, | |
maxMarks)) | |
return (cleanLine, dictLine, marks, maxMarks) | |
def _splitHeader(self): | |
""" | |
Break the input file into a Header and body | |
We take the standard MD approach of using a YAML header in the file. | |
The header is delimited by ```---``` and should be at the TOP of the document | |
@return (header, body): None if they dont exist | |
""" | |
self.log.debug("Splitting File") | |
header = [] | |
body = [] | |
inHeader = False | |
for line in self.fd: | |
theLine = line.rstrip() | |
if theLine.startswith("---"): | |
#Trigger header switch state | |
inHeader = not inHeader | |
else: | |
if inHeader: | |
header.append(theLine) | |
else: | |
body.append(theLine) | |
#Put the Header together | |
if header: | |
outHeader = "\n".join(header) | |
else: | |
outHeader = None | |
if body[0] == "": | |
outBody = body[1:] | |
else: | |
outBody = body | |
return outHeader, outBody | |
def _parseHeader(self, theHeader): | |
""" | |
Parse any YAML header in the file | |
This will take a string representing the YAML header, | |
parse it and update the self.header variable with its contents | |
@param theHeader: Header section | |
@return: True if successfull | |
""" | |
self.header = yaml.safe_load(theHeader) | |
self.log.debug("Header Items {0}".format(self.header)) | |
#Now we add something for Total Marks | |
if self.header.get("marks"): | |
#Exclude if we have Total marks | |
print(self.header.get("marks")) | |
totalMarks = 0 | |
for key, value in self.header["marks"].items(): | |
if key.lower() != "total": | |
totalMarks += value | |
#And for Errors | |
if "total" in self.header["marks"]: | |
if totalMarks != self.header["marks"]["total"]: | |
self.log.warning("Marks Mismatch. Provided {0} Calculated {1}".format(self.header["marks"]["total"], totalMarks)) | |
self.header["marks"]["total"] = totalMarks | |
return True | |
def _parseBody(self, theText): | |
""" | |
Parse the text portion of the file | |
This takes the markdown text in the file, and breaks it into | |
sections based on the line headings. | |
""" | |
currentSection = None | |
out = [] | |
sections = {} | |
for line in theText: | |
#self.log.debug(" {0}".format(line)) | |
if line.startswith("# "): #Kludgy Space for matching | |
if currentSection: | |
#Create the section object | |
#Replace spaces with underscores for template | |
self.log.debug("Body Text {0}".format(out[:3])) | |
theSection = section.Section(out, | |
marks=currentMarks, | |
header=headerText, | |
maxMarks=maxMarks) | |
sections[currentSection] = theSection | |
self.sectionOrder.append(currentSection) | |
else: | |
pass | |
#And update our Current section | |
#We first need to clean the line up | |
headerText, currentSection, currentMarks, maxMarks = self._parseSectionHeader(line) | |
out = [] | |
else: | |
cleanLine = line.rstrip() #Also remove newlines | |
out.append(cleanLine) | |
#We also need to store the final section | |
self.log.debug("Body Text {0}".format(out[:3])) | |
theSection = section.Section(out, | |
marks=currentMarks, | |
header=headerText, | |
maxMarks=maxMarks) | |
sections[currentSection] = theSection | |
self.sectionOrder.append(currentSection) | |
#And Store the sections where they belong | |
self.sections = sections | |
return True | |
def parseFile(self): | |
""" Parse the input file | |
This function will split the input file into the Header and Body. | |
Then parse each of them | |
""" | |
header, body = self._splitHeader() | |
#Parse the Header if it exists | |
if header: | |
self._parseHeader(header) | |
#Parse the Body if it exists | |
if body: | |
self._parseBody(body) | |
return True |