#!/usr/bin/env python3
import os
import sys
import markdown
from mdx_gfm import GithubFlavoredMarkdownExtension
import weasyprint
import re
import logging
logging.basicConfig(format='%(levelname)s: %(message)s' ,stream=sys.stderr, level=logging.INFO)
LOGGER = logging.getLogger('preprocess')
# sort the file order
def sort_func(x):
  # place "papers/" at the end (like an appendix)
  try:
    x.index('%sdoc%s' % (os.path.sep, os.path.sep))
  except ValueError:
    return 'z'
  
  # place readmes at the start of each section
  try:
    rm = x.index('README.md')
    return x[:rm] + '0'
  except ValueError:
    return x
# make the links work in-pdf
def fix_links(match):
  # images
  if os.path.splitext(match.group(2))[-1] == '.png':
    return '[%s](%s)' % (
      match.group(1),
      os.path.join(os.path.split(my_file)[0], match.group(2))
    )
  
  # urls to other files
  BASE_URL = 'https://github.com/tildearrow/furnace/tree/master/'
  if match.group(2).startswith(BASE_URL):
    file_path = match.group(2).split(BASE_URL)[-1]
    if os.path.splitext(file_path)[-1] == '':
      file_path += '/README.md'
    return '[%s](#%s)' % (
      match.group(1),
      file_path.replace('/','__')
    )
  
  # preserve external urls
  elif match.group(2).startswith('http'):
    return match.group(0)
  
  # fix paths
  act_path = os.path.split(my_file)[0] + '/' + match.group(2)
  act_path = os.path.relpath(os.path.abspath(act_path))
  return '[%s](#%s)' % (
    match.group(1),
    act_path.replace(os.path.sep,'__')
  )
def fix_headings(match):
  return '%s#' % (
    match.group(1)
  )
if __name__ == "__main__":
  #-- first, prepare the file list --#
  file_list = []
  for i in os.walk('../../doc'):
    base_dir, subfolders, files = i
    for file_ in filter(lambda x: x.lower().endswith('.md'), files):
      file_list.append(os.path.join(base_dir, file_))
  #-- then, create the document --#
  html = ''
  # perform sort
  file_list.sort(key=sort_func)
  for my_file in file_list:
    with open(my_file, 'r') as md:
      LOGGER.info("processing file %s" % my_file)
      data = md.read()
    
    # perform link fixing
    data = re.sub(r'\[(.+?)\]\((.+?)\)', fix_links, data)
    data = re.sub(r'^\s*(#+)', fix_headings, data, flags=re.MULTILINE)
    
    # each file is its own section
    html +='
