mirror of https://github.com/procxx/kepka.git
				
				
				
			
		
			
				
	
	
		
			447 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			447 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python
 | |
| 
 | |
| #
 | |
| # updateDocumentToC.py
 | |
| #
 | |
| # Insert table of contents at top of Catch markdown documents.
 | |
| #
 | |
| # This script is distributed under the GNU General Public License v3.0
 | |
| #
 | |
| # It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
 | |
| # https://github.com/rasbt/markdown-toclify
 | |
| #
 | |
| 
 | |
| from  __future__  import print_function
 | |
| from scriptCommon import catchPath
 | |
| 
 | |
| import argparse
 | |
| import glob
 | |
| import os
 | |
| import re
 | |
| import sys
 | |
| 
 | |
| # Configuration:
 | |
| 
 | |
| minTocEntries = 4
 | |
| 
 | |
| headingExcludeDefault = [1,3,4,5]  # use level 2 headers for at default
 | |
| headingExcludeRelease = [2,3,4,5]  # use level 1 headers for release-notes.md
 | |
| 
 | |
| documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
 | |
| releaseNotesName = 'release-notes.md'
 | |
| 
 | |
| contentTitle = '**Contents**'
 | |
| contentLineNo = 4
 | |
| contentLineNdx = contentLineNo - 1
 | |
| 
 | |
| # End configuration
 | |
| 
 | |
| VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
 | |
| 
 | |
| def readLines(in_file):
 | |
|     """Returns a list of lines from a input markdown file."""
 | |
| 
 | |
|     with open(in_file, 'r') as inf:
 | |
|         in_contents = inf.read().split('\n')
 | |
|     return in_contents
 | |
| 
 | |
| def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
 | |
|     """Removes existing [back to top] links and <a id> tags."""
 | |
| 
 | |
|     if not remove:
 | |
|         return lines[:]
 | |
| 
 | |
|     out = []
 | |
|     for l in lines:
 | |
|         if l.startswith(remove):
 | |
|             continue
 | |
|         out.append(l)
 | |
|     return out
 | |
| 
 | |
| def removeToC(lines):
 | |
|     """Removes existing table of contents starting at index contentLineNdx."""
 | |
|     if not lines[contentLineNdx ].startswith(contentTitle):
 | |
|         return lines[:]
 | |
| 
 | |
|     result_top = lines[:contentLineNdx]
 | |
| 
 | |
|     pos = contentLineNdx + 1
 | |
|     while lines[pos].startswith('['):
 | |
|         pos = pos + 1
 | |
| 
 | |
|     result_bottom = lines[pos + 1:]
 | |
| 
 | |
|     return result_top + result_bottom
 | |
| 
 | |
| def dashifyHeadline(line):
 | |
|     """
 | |
|     Takes a header line from a Markdown document and
 | |
|     returns a tuple of the
 | |
|         '#'-stripped version of the head line,
 | |
|         a string version for <a id=''></a> anchor tags,
 | |
|         and the level of the headline as integer.
 | |
|     E.g.,
 | |
|     >>> dashifyHeadline('### some header lvl3')
 | |
|     ('Some header lvl3', 'some-header-lvl3', 3)
 | |
| 
 | |
|     """
 | |
|     stripped_right = line.rstrip('#')
 | |
|     stripped_both = stripped_right.lstrip('#')
 | |
|     level = len(stripped_right) - len(stripped_both)
 | |
|     stripped_wspace = stripped_both.strip()
 | |
| 
 | |
|     # character replacements
 | |
|     replaced_colon = stripped_wspace.replace('.', '')
 | |
|     replaced_slash = replaced_colon.replace('/', '')
 | |
|     rem_nonvalids = ''.join([c if c in VALIDS
 | |
|                              else '-' for c in replaced_slash])
 | |
| 
 | |
|     lowered = rem_nonvalids.lower()
 | |
|     dashified = re.sub(r'(-)\1+', r'\1', lowered)  # remove duplicate dashes
 | |
|     dashified = dashified.strip('-')  # strip dashes from start and end
 | |
| 
 | |
|     # exception '&' (double-dash in github)
 | |
|     dashified = dashified.replace('-&-', '--')
 | |
| 
 | |
|     return [stripped_wspace, dashified, level]
 | |
| 
 | |
| def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
 | |
|     """
 | |
|     Gets headlines from the markdown document and creates anchor tags.
 | |
| 
 | |
|     Keyword arguments:
 | |
|         lines: a list of sublists where every sublist
 | |
|             represents a line from a Markdown document.
 | |
|         id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
 | |
|         back_links: if true, adds "back to top" links below each headline
 | |
|         exclude_h: header levels to exclude. E.g., [2, 3]
 | |
|             excludes level 2 and 3 headings.
 | |
| 
 | |
|     Returns a tuple of 2 lists:
 | |
|         1st list:
 | |
|             A modified version of the input list where
 | |
|             <a id="some-header"></a> anchor tags where inserted
 | |
|             above the header lines (if github is False).
 | |
| 
 | |
|         2nd list:
 | |
|             A list of 3-value sublists, where the first value
 | |
|             represents the heading, the second value the string
 | |
|             that was inserted assigned to the IDs in the anchor tags,
 | |
|             and the third value is an integer that reprents the headline level.
 | |
|             E.g.,
 | |
|             [['some header lvl3', 'some-header-lvl3', 3], ...]
 | |
| 
 | |
|     """
 | |
|     out_contents = []
 | |
|     headlines = []
 | |
|     for l in lines:
 | |
|         saw_headline = False
 | |
| 
 | |
|         orig_len = len(l)
 | |
|         l_stripped = l.lstrip()
 | |
| 
 | |
|         if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
 | |
| 
 | |
|             # comply with new markdown standards
 | |
| 
 | |
|             # not a headline if '#' not followed by whitespace '##no-header':
 | |
|             if not l.lstrip('#').startswith(' '):
 | |
|                 continue
 | |
|             # not a headline if more than 6 '#':
 | |
|             if len(l) - len(l.lstrip('#')) > 6:
 | |
|                 continue
 | |
|             # headers can be indented by at most 3 spaces:
 | |
|             if orig_len - len(l_stripped) > 3:
 | |
|                 continue
 | |
| 
 | |
|             # ignore empty headers
 | |
|             if not set(l) - {'#', ' '}:
 | |
|                 continue
 | |
| 
 | |
|             saw_headline = True
 | |
|             dashified = dashifyHeadline(l)
 | |
| 
 | |
|             if not exclude_h or not dashified[-1] in exclude_h:
 | |
|                 if id_tag:
 | |
|                     id_tag = '<a class="mk-toclify" id="%s"></a>'\
 | |
|                               % (dashified[1])
 | |
|                     out_contents.append(id_tag)
 | |
|                 headlines.append(dashified)
 | |
| 
 | |
|         out_contents.append(l)
 | |
|         if back_links and saw_headline:
 | |
|             out_contents.append('[[back to top](#table-of-contents)]')
 | |
|     return out_contents, headlines
 | |
| 
 | |
| def positioningHeadlines(headlines):
 | |
|     """
 | |
|     Strips unnecessary whitespaces/tabs if first header is not left-aligned
 | |
|     """
 | |
|     left_just = False
 | |
|     for row in headlines:
 | |
|         if row[-1] == 1:
 | |
|             left_just = True
 | |
|             break
 | |
|     if not left_just:
 | |
|         for row in headlines:
 | |
|             row[-1] -= 1
 | |
|     return headlines
 | |
| 
 | |
| def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
 | |
|     """
 | |
|     Creates the table of contents from the headline list
 | |
|     that was returned by the tagAndCollect function.
 | |
| 
 | |
|     Keyword Arguments:
 | |
|         headlines: list of lists
 | |
|             e.g., ['Some header lvl3', 'some-header-lvl3', 3]
 | |
|         hyperlink: Creates hyperlinks in Markdown format if True,
 | |
|             e.g., '- [Some header lvl1](#some-header-lvl1)'
 | |
|         top_link: if True, add a id tag for linking the table
 | |
|             of contents itself (for the back-to-top-links)
 | |
|         no_toc_header: suppresses TOC header if True.
 | |
| 
 | |
|     Returns  a list of headlines for a table of contents
 | |
|     in Markdown format,
 | |
|     e.g., ['        - [Some header lvl3](#some-header-lvl3)', ...]
 | |
| 
 | |
|     """
 | |
|     processed = []
 | |
|     if not no_toc_header:
 | |
|         if top_link:
 | |
|             processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
 | |
|         processed.append(contentTitle + '<br>')
 | |
| 
 | |
|     for line in headlines:
 | |
|         if hyperlink:
 | |
|             item = '[%s](#%s)' % (line[0], line[1])
 | |
|         else:
 | |
|             item = '%s- %s' % ((line[2]-1)*'    ', line[0])
 | |
|         processed.append(item + '<br>')
 | |
|     processed.append('\n')
 | |
|     return processed
 | |
| 
 | |
| def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
 | |
|     """
 | |
|     Returns a string with the Markdown output contents incl.
 | |
|     the table of contents.
 | |
| 
 | |
|     Keyword arguments:
 | |
|         toc_headlines: lines for the table of contents
 | |
|             as created by the createToc function.
 | |
|         body: contents of the Markdown file including
 | |
|             ID-anchor tags as returned by the
 | |
|             tagAndCollect function.
 | |
|         spacer: Adds vertical space after the table
 | |
|             of contents. Height in pixels.
 | |
|         placeholder: If a placeholder string is provided, the placeholder
 | |
|             will be replaced by the TOC instead of inserting the TOC at
 | |
|             the top of the document
 | |
| 
 | |
|     """
 | |
|     if spacer:
 | |
|         spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
 | |
|         toc_markdown = "\n".join(toc_headlines + spacer_line)
 | |
|     else:
 | |
|         toc_markdown = "\n".join(toc_headlines)
 | |
| 
 | |
|     if placeholder:
 | |
|         body_markdown = "\n".join(body)
 | |
|         markdown = body_markdown.replace(placeholder, toc_markdown)
 | |
|     else:
 | |
|         body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
 | |
|         body_markdown_p2 = "\n".join(body[ contentLineNdx:])
 | |
|         markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
 | |
| 
 | |
|     return markdown
 | |
| 
 | |
| def outputMarkdown(markdown_cont, output_file):
 | |
|     """
 | |
|     Writes to an output file if `outfile` is a valid path.
 | |
| 
 | |
|     """
 | |
|     if output_file:
 | |
|         with open(output_file, 'w') as out:
 | |
|             out.write(markdown_cont)
 | |
| 
 | |
| def markdownToclify(
 | |
|     input_file,
 | |
|     output_file=None,
 | |
|     min_toc_len=2,
 | |
|     github=False,
 | |
|     back_to_top=False,
 | |
|     nolink=False,
 | |
|     no_toc_header=False,
 | |
|     spacer=0,
 | |
|     placeholder=None,
 | |
|     exclude_h=None):
 | |
|     """ Function to add table of contents to markdown files.
 | |
| 
 | |
|     Parameters
 | |
|     -----------
 | |
|       input_file: str
 | |
|         Path to the markdown input file.
 | |
| 
 | |
|       output_file: str (defaul: None)
 | |
|         Path to the markdown output file.
 | |
| 
 | |
|       min_toc_len: int (default: 2)
 | |
|         Miniumum number of entries to create a table of contents for.
 | |
| 
 | |
|       github: bool (default: False)
 | |
|         Uses GitHub TOC syntax if True.
 | |
| 
 | |
|       back_to_top: bool (default: False)
 | |
|         Inserts back-to-top links below headings if True.
 | |
| 
 | |
|       nolink: bool (default: False)
 | |
|         Creates the table of contents without internal links if True.
 | |
| 
 | |
|       no_toc_header: bool (default: False)
 | |
|         Suppresses the Table of Contents header if True
 | |
| 
 | |
|       spacer: int (default: 0)
 | |
|         Inserts horizontal space (in pixels) after the table of contents.
 | |
| 
 | |
|       placeholder: str (default: None)
 | |
|         Inserts the TOC at the placeholder string instead
 | |
|         of inserting the TOC at the top of the document.
 | |
| 
 | |
|       exclude_h: list (default None)
 | |
|         Excludes header levels, e.g., if [2, 3], ignores header
 | |
|         levels 2 and 3 in the TOC.
 | |
| 
 | |
|     Returns
 | |
|     -----------
 | |
|     changed: Boolean
 | |
|       True if the file has been updated, False otherwise.
 | |
| 
 | |
|     """
 | |
|     cleaned_contents = removeLines(
 | |
|         removeToC(readLines(input_file)),
 | |
|         remove=('[[back to top]', '<a class="mk-toclify"'))
 | |
| 
 | |
|     processed_contents, raw_headlines = tagAndCollect(
 | |
|         cleaned_contents,
 | |
|         id_tag=not github,
 | |
|         back_links=back_to_top,
 | |
|         exclude_h=exclude_h)
 | |
| 
 | |
|     # add table of contents?
 | |
|     if len(raw_headlines) < min_toc_len:
 | |
|         processed_headlines = []
 | |
|     else:
 | |
|         leftjustified_headlines = positioningHeadlines(raw_headlines)
 | |
| 
 | |
|         processed_headlines = createToc(
 | |
|             leftjustified_headlines,
 | |
|             hyperlink=not nolink,
 | |
|             top_link=not nolink and not github,
 | |
|             no_toc_header=no_toc_header)
 | |
| 
 | |
|     if nolink:
 | |
|         processed_contents = cleaned_contents
 | |
| 
 | |
|     cont = buildMarkdown(
 | |
|         toc_headlines=processed_headlines,
 | |
|         body=processed_contents,
 | |
|         spacer=spacer,
 | |
|         placeholder=placeholder)
 | |
| 
 | |
|     if output_file:
 | |
|         outputMarkdown(cont, output_file)
 | |
| 
 | |
| def isReleaseNotes(f):
 | |
|     return os.path.basename(f) == releaseNotesName
 | |
| 
 | |
| def excludeHeadingsFor(f):
 | |
|     return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
 | |
| 
 | |
| def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
 | |
|     """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
 | |
|     if verbose :
 | |
|         print( 'file: {}'.format(input_file))
 | |
| 
 | |
|     output_file = input_file + '.tmp'
 | |
| 
 | |
|     markdownToclify(
 | |
|         input_file=input_file,
 | |
|         output_file=output_file,
 | |
|         min_toc_len=min_toc_len,
 | |
|         github=True,
 | |
|         back_to_top=False,
 | |
|         nolink=False,
 | |
|         no_toc_header=False,
 | |
|         spacer=False,
 | |
|         placeholder=False,
 | |
|         exclude_h=excludeHeadingsFor(input_file))
 | |
| 
 | |
|     # prevent race-condition (Python 3.3):
 | |
|     if sys.version_info >= (3, 3):
 | |
|         os.replace(output_file, input_file)
 | |
|     else:
 | |
|         os.remove(input_file)
 | |
|         os.rename(output_file, input_file)
 | |
| 
 | |
|     return 1
 | |
| 
 | |
| def updateDocumentToC(paths, min_toc_len, verbose):
 | |
|     """Add or update table of contents to specified paths. Return number of changed files"""
 | |
|     n = 0
 | |
|     for g in paths:
 | |
|         for f in glob.glob(g):
 | |
|             if os.path.isfile(f):
 | |
|                 n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
 | |
|     return n
 | |
| 
 | |
| def updateDocumentToCMain():
 | |
|     """Add or update table of contents to specified paths."""
 | |
| 
 | |
|     parser = argparse.ArgumentParser(
 | |
|         description='Add or update table of contents in markdown documents.',
 | |
|         epilog="""""",
 | |
|         formatter_class=argparse.RawTextHelpFormatter)
 | |
| 
 | |
|     parser.add_argument(
 | |
|         'Input',
 | |
|         metavar='file',
 | |
|         type=str,
 | |
|         nargs=argparse.REMAINDER,
 | |
|         help='files to process, at default: docs/*.md')
 | |
| 
 | |
|     parser.add_argument(
 | |
|         '-v', '--verbose',
 | |
|         action='store_true',
 | |
|         help='report the name of the file being processed')
 | |
| 
 | |
|     parser.add_argument(
 | |
|         '--min-toc-entries',
 | |
|         dest='minTocEntries',
 | |
|         default=minTocEntries,
 | |
|         type=int,
 | |
|         metavar='N',
 | |
|         help='the minimum number of entries to create a table of contents for [{deflt}]'.format(deflt=minTocEntries))
 | |
| 
 | |
|     parser.add_argument(
 | |
|         '--remove-toc',
 | |
|         action='store_const',
 | |
|         dest='minTocEntries',
 | |
|         const=99,
 | |
|         help='remove all tables of contents')
 | |
| 
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     paths = args.Input if len(args.Input) > 0 else [documentsDefault]
 | |
| 
 | |
|     changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
 | |
| 
 | |
|     if changedFiles > 0:
 | |
|         print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
 | |
|     else:
 | |
|         print( "No table of contents added or updated" )
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     updateDocumentToCMain()
 | |
| 
 | |
| # end of file
 |