parser.py

"""
    reMarkable:  Convert markdown to docs using Templates
    Copyright (C) 2020 Dan Goldsmith (djgoldsmith@googlemail.com)

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

import logging
import re

import yaml

import remarkable.section as section

class MarkdownParser():
    """
    Parser for Markdown input files

    This will split out the header and body, and parse it.
    output for sections is based on top level headings
    """
    def __init__(self, theFile):
        """Create a markdown parser,
        param theFile: File to use as input
        """
        self.log = logging.getLogger("PARSER")
        self.theFile = theFile
        self.log.debug("Create Parser For: {0}".format(self.theFile))

        self.fd = open(theFile, "r", encoding="utf-8")

        #And things we are storing
        self.sections = None
        self.header = None
        #Add this for the generic template
        self.sectionOrder = []

    def _parseSectionHeader(self, line):
        """Helper function to parse the Heading line for a section

        @param line: Text to parse
        @return: tuple of (text, marks)
        """

        cleanLine = line.strip("#") #Remote Leading Hash
        cleanLine = cleanLine.strip() #Remove Newline
        marks = None
        maxMarks = None

        marksRe = re.compile(r"\[\s?(\d+)\s?/?(\s?\d+)?\s?\]")
        #Search for any matches
        theMatch = marksRe.search(cleanLine)
        if theMatch:
            marks = theMatch[1] #We know its the first group
            maxMarks = theMatch[2]
            #Cast to Integer

            marks = int(marks)
            if maxMarks:
                maxMarks = int(maxMarks)

            #And delete the marks part
            cleanLine = marksRe.sub("", cleanLine)
            cleanLine = cleanLine.strip()

        #We also want to replace spaces with underscores
        dictLine = cleanLine.replace(" ", "_")
        self.log.debug("--> Section '{0}' (Marks {1} Max {2}) ".format(dictLine,
                                                                       marks,
                                                                       maxMarks))
        return (cleanLine, dictLine, marks, maxMarks)


    def _splitHeader(self):
        """
        Break the input file into a Header and body

        We take the standard MD approach of using a YAML header in the file.
        The header is delimited by ```---``` and should be at the TOP of the document


        @return (header, body):  None if they dont exist
        """

        self.log.debug("Splitting File")

        header = []
        body = []

        inHeader = False

        for line in self.fd:
            theLine = line.rstrip()
            if theLine.startswith("---"):
                #Trigger header switch state
                inHeader = not inHeader
            else:
                if inHeader:
                    header.append(theLine)
                else:
                    body.append(theLine)

        #Put the Header together
        if header:
            outHeader = "\n".join(header)
        else:
            outHeader = None

        if body[0] == "":
            outBody = body[1:]
        else:
            outBody = body


        return outHeader, outBody

    def _parseHeader(self, theHeader):
        """
        Parse any YAML header in the file

        This will take a string representing the YAML header,
        parse it and update the self.header variable with its contents

        @param theHeader:  Header section
        @return: True if successfull
        """

        self.header = yaml.safe_load(theHeader)
        self.log.debug("Header Items {0}".format(self.header))
        #Now we add something for Total Marks
        if self.header.get("marks"):
            #Exclude if we have Total marks
            print(self.header.get("marks"))
            totalMarks = 0
            for key, value in self.header["marks"].items():
                if key.lower() != "total":
                    totalMarks += value

            #And for Errors
            if "total" in self.header["marks"]:
                if totalMarks != self.header["marks"]["total"]:
                    self.log.warning("Marks Mismatch.  Provided {0} Calculated {1}".format(self.header["marks"]["total"], totalMarks))
            self.header["marks"]["total"] = totalMarks

        return True

    def _parseBody(self, theText):
        """
        Parse the text portion of the file

        This takes the markdown text in the file, and breaks it into
        sections based on the line headings.
        """

        currentSection = None
        out = []
        sections = {}
        for line in theText:
            #self.log.debug(" {0}".format(line))
            if line.startswith("# "): #Kludgy Space for matching
                if currentSection:
                    #Create the section object
                    #Replace spaces with underscores for template
                    self.log.debug("Body Text {0}".format(out[:3]))
                    theSection = section.Section(out,
                                                 marks=currentMarks,
                                                 header=headerText,
                                                 maxMarks=maxMarks)

                    sections[currentSection] = theSection
                    self.sectionOrder.append(currentSection)
                else:
                    pass
                #And update our Current section
                #We first need to clean the line up
                headerText, currentSection, currentMarks, maxMarks = self._parseSectionHeader(line)
                out = []
            else:
                cleanLine = line.rstrip() #Also remove newlines
                out.append(cleanLine)
        #We also need to store the final section

        self.log.debug("Body Text {0}".format(out[:3]))
        theSection = section.Section(out,
                                     marks=currentMarks,
                                     header=headerText,
                                     maxMarks=maxMarks)

        sections[currentSection] = theSection
        self.sectionOrder.append(currentSection)

        #And Store the sections where they belong
        self.sections = sections
        return True

    def parseFile(self):
        """ Parse the input file


        This function will split the input file into the Header and Body.
        Then parse each of them
        """

        header, body = self._splitHeader()

        #Parse the Header if it exists
        if header:
            self._parseHeader(header)
        #Parse the Body if it exists
        if body:
            self._parseBody(body)

        return True
	"""
	reMarkable: Convert markdown to docs using Templates
	Copyright (C) 2020 Dan Goldsmith (djgoldsmith@googlemail.com)

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <https://www.gnu.org/licenses/>.
	"""

	import logging
	import re

	import yaml

	import remarkable.section as section

	class MarkdownParser():
	"""
	Parser for Markdown input files

	This will split out the header and body, and parse it.
	output for sections is based on top level headings
	"""
	def __init__(self, theFile):
	"""Create a markdown parser,
	param theFile: File to use as input
	"""
	self.log = logging.getLogger("PARSER")
	self.theFile = theFile
	self.log.debug("Create Parser For: {0}".format(self.theFile))

	self.fd = open(theFile, "r", encoding="utf-8")

	#And things we are storing
	self.sections = None
	self.header = None
	#Add this for the generic template
	self.sectionOrder = []

	def _parseSectionHeader(self, line):
	"""Helper function to parse the Heading line for a section

	@param line: Text to parse
	@return: tuple of (text, marks)
	"""

	cleanLine = line.strip("#") #Remote Leading Hash
	cleanLine = cleanLine.strip() #Remove Newline
	marks = None
	maxMarks = None

	marksRe = re.compile(r"\[\s?(\d+)\s?/?(\s?\d+)?\s?\]")
	#Search for any matches
	theMatch = marksRe.search(cleanLine)
	if theMatch:
	marks = theMatch[1] #We know its the first group
	maxMarks = theMatch[2]
	#Cast to Integer

	marks = int(marks)
	if maxMarks:
	maxMarks = int(maxMarks)

	#And delete the marks part
	cleanLine = marksRe.sub("", cleanLine)
	cleanLine = cleanLine.strip()

	#We also want to replace spaces with underscores
	dictLine = cleanLine.replace(" ", "_")
	self.log.debug("--> Section '{0}' (Marks {1} Max {2}) ".format(dictLine,
	marks,
	maxMarks))
	return (cleanLine, dictLine, marks, maxMarks)


	def _splitHeader(self):
	"""
	Break the input file into a Header and body

	We take the standard MD approach of using a YAML header in the file.
	The header is delimited by ```---``` and should be at the TOP of the document


	@return (header, body): None if they dont exist
	"""

	self.log.debug("Splitting File")

	header = []
	body = []

	inHeader = False

	for line in self.fd:
	theLine = line.rstrip()
	if theLine.startswith("---"):
	#Trigger header switch state
	inHeader = not inHeader
	else:
	if inHeader:
	header.append(theLine)
	else:
	body.append(theLine)

	#Put the Header together
	if header:
	outHeader = "\n".join(header)
	else:
	outHeader = None

	if body[0] == "":
	outBody = body[1:]
	else:
	outBody = body


	return outHeader, outBody

	def _parseHeader(self, theHeader):
	"""
	Parse any YAML header in the file

	This will take a string representing the YAML header,
	parse it and update the self.header variable with its contents

	@param theHeader: Header section
	@return: True if successfull
	"""

	self.header = yaml.safe_load(theHeader)
	self.log.debug("Header Items {0}".format(self.header))
	#Now we add something for Total Marks
	if self.header.get("marks"):
	#Exclude if we have Total marks
	print(self.header.get("marks"))
	totalMarks = 0
	for key, value in self.header["marks"].items():
	if key.lower() != "total":
	totalMarks += value

	#And for Errors
	if "total" in self.header["marks"]:
	if totalMarks != self.header["marks"]["total"]:
	self.log.warning("Marks Mismatch. Provided {0} Calculated {1}".format(self.header["marks"]["total"], totalMarks))
	self.header["marks"]["total"] = totalMarks

	return True

	def _parseBody(self, theText):
	"""
	Parse the text portion of the file

	This takes the markdown text in the file, and breaks it into
	sections based on the line headings.
	"""

	currentSection = None
	out = []
	sections = {}
	for line in theText:
	#self.log.debug(" {0}".format(line))
	if line.startswith("# "): #Kludgy Space for matching
	if currentSection:
	#Create the section object
	#Replace spaces with underscores for template
	self.log.debug("Body Text {0}".format(out[:3]))
	theSection = section.Section(out,
	marks=currentMarks,
	header=headerText,
	maxMarks=maxMarks)

	sections[currentSection] = theSection
	self.sectionOrder.append(currentSection)
	else:
	pass
	#And update our Current section
	#We first need to clean the line up
	headerText, currentSection, currentMarks, maxMarks = self._parseSectionHeader(line)
	out = []
	else:
	cleanLine = line.rstrip() #Also remove newlines
	out.append(cleanLine)
	#We also need to store the final section

	self.log.debug("Body Text {0}".format(out[:3]))
	theSection = section.Section(out,
	marks=currentMarks,
	header=headerText,
	maxMarks=maxMarks)

	sections[currentSection] = theSection
	self.sectionOrder.append(currentSection)

	#And Store the sections where they belong
	self.sections = sections
	return True

	def parseFile(self):
	""" Parse the input file


	This function will split the input file into the Header and Body.
	Then parse each of them
	"""

	header, body = self._splitHeader()

	#Parse the Header if it exists
	if header:
	self._parseHeader(header)
	#Parse the Body if it exists
	if body:
	self._parseBody(body)

	return True