You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

359 lines
13 KiB

#!/usr/bin/env python3
"""Migrate TiddlyWiki5 WikiText files to Markdown files.
This script creates a .md + .md.meta file for every given .tid file.
System tiddlers and other special tiddlers are skipped because they
don't work in TiddlyWiki when migrated to Markdown. Tiddlers using
macros get converted unlike tiddlers defining macros. Tiddlers
containing tables are skipped by default because Markdown tables are
very limited. If your tables are simple anyway or you just don't care
about losing table features (two header rows, captions, cell alignment,
cell merging, ...), just use the --tables flag.
"""
__version__ = '0.1'
__author__ = 'Max Schillinger'
__email__ = 'maxschillinger@web.de'
import os
import re
import argparse
import urllib.parse
from pathlib import Path
from typing import TextIO
re_special_tag = re.compile(r'^tags:.*\$:/tags/')
re_special_title = re.compile(r'^title: ?\$:/')
re_external_link = re.compile(r'\[\[([^|]+)\|([^\]]+)\]\]')
re_internal_link = re.compile(r'\[\[([^|]+?)\]\]')
re_image = re.compile(r'\[img \[([^]]+?)\]\]')
re_url = re.compile(r'(^|[^("])(https?://[\w#/@:._?%=-]+)($|[^)"])')
re_bold = re.compile(r"(\s|^)''([^']+)''")
re_italic = re.compile(r"(\s|^)//([^/]+)//")
re_bold_italic = re.compile(r"(\s|^)''//([^/']+)//''")
re_italic_bold = re.compile(r"(\s|^)//''([^/']+)''//")
re_underscore = re.compile(r"(\s|^)__([^_]+)__")
re_superscript = re.compile(r"(\W|^)\^\^([^^]+)\^\^")
re_subscript = re.compile(r"(\s|^),,([^,]+),,")
re_definition = re.compile(r'^; *([^ ].*)$')
re_whitespace_only = re.compile(r'^[ \t]*\n$')
re_table = re.compile(r'^\|')
re_separator_cell = re.compile(r'\|-{0,2}\|')
class style():
RED = '\033[31m'
GREEN = '\033[32m'
YELLOW = '\033[33m'
BLUE = '\033[34m'
RESET = '\033[0m'
def error(text: str):
print(style.RED + text + style.RESET)
def warning(text: str):
print(style.YELLOW + text + style.RESET)
def info(text: str):
print(style.BLUE + text + style.RESET)
def write_meta_file(lines: list, meta_file: Path) -> int:
index = 0
if lines[0].split(':', 1)[0] not in ['caption', 'created', 'modified',
'tags', 'title', 'type']:
# no header found
return index
type_defined = False
with open(meta_file, 'w') as f:
for index, line in enumerate(lines):
if re.match(r'[a-z]+:', line):
if line.startswith('type:'):
# line = line.replace('type: text/vnd.tiddlywiki',
# 'type: text/x-markdown')
line = 'type: text/x-markdown\n'
type_defined = True
f.write(line)
else:
if not type_defined:
# add type property
line = 'type: text/x-markdown\n'
f.write(line)
break
# TODO: Write meta file (with at least `type:`) when .tid file
# doesn't contain a header?
# skip empty line after header
if lines[index] == '\n':
index += 1
return index
def write(f: TextIO, line: str, quoted: bool = False):
if quoted:
line = f'> {line}'
f.write(line)
def write_markdown_file(lines: list, md_file: Path) -> bool:
try:
print(md_file.name)
with open(md_file, 'w') as f:
codeblock = False
blockquote = False
table = False
for line in lines:
# block quote
if blockquote:
if line.startswith('<<<'):
# end of block quote
blockquote = False
continue
# else:
# line = '> ' + line
if line.startswith('<<<'):
# start of block quote
blockquote = True
continue
# code blocks
if codeblock:
if line.startswith('```'):
# end of code block
codeblock = False
write(f, line, blockquote)
continue
if line.startswith('```'):
# start of code block
codeblock = True
write(f, line, blockquote)
continue
# images
line = re_image.sub(r'<img src="\1">', line)
# links
# [[text|url]]
line = re_external_link.sub(r'[\1](\2)', line)
# [[Another tiddler]] → [Another tiddler](#Another%20tiddler)
# line = re_internal_link.sub(
# lambda m: (
# f'[{m.group(1)}](#{urllib.parse.quote(m.group(1))})'
# ),
# line
# )
# [[internal link|Tiddler]] → [internal link](#Tiddler)
# plain url
line = re_url.sub(r'\1<\2>\3', line)
# bold text
line = re_bold.sub(r'\1**\2**', line)
string = re.sub(r'__(.*)__', r'**\1**', '__test__')
# italic text
line = re_italic.sub(r'\1_\2_', line)
# bold italic text
line = re_bold_italic.sub(r'\1**_\2_**', line)
line = re_italic_bold.sub(r'\1_**\2**_', line)
# escape * inside words
line = re.sub(r'([\w\d])\*([\w\d])', r'\1\*\2', line)
# underscored text
line = re_underscore.sub(r'\1<u>\2</u>', line)
# superscripted text
line = re_superscript.sub(r'\1<sup>\2</sup>', line)
# subscripted text
line = re_subscript.sub(r'\1<sub>\2</sub>', line)
# list
m = re.match(r'^([*#]+) ', line)
if m:
markers = m.groups()[0]
indentation_level = len(markers) - 1
final_marker = '1.' if markers[-1] == '#' else '*'
line = (
' '*4*indentation_level
+ final_marker
+ line[len(markers):]
)
# headers (need to be migrated after lists because a Markdown
# header looks like a WikiText numbered list item)
i = 0
while i < len(line) and line[i] == '!':
i += 1
if i > 0:
line = line.replace('!', '#', i)
line = re.sub(r'^(#+)([^#\n ])', r'\1 \2', line)
# definitions ;xxx :yyy
line = re_definition.sub(r'**\1**', line)
# https://www.markdownguide.org/extended-syntax/#definition-lists
# Not supported by pulldown-cmark or md4c.
# https://github.com/Jermolene/TiddlyWiki5/pull/6528
# supports it
# tables
if line.startswith('|'):
if not table:
# first row of table
if line.endswith('h\n'):
line = line[:-2] + '\n'
# empty header cells should contain at least
# one space (depends on Markdown parser)
while '||' in line:
line = line.replace('||', '| |')
write(f, line, blockquote)
# insert separator line
separator = re.sub(r'[^|\n]', '-', line)
# center align all columns, use at least
# three dashes per cell ('|:---:|')
while re_separator_cell.search(separator):
separator = re_separator_cell.sub('|:---:|',
separator)
while re.search(r'\|--+-\|', separator):
separator = re.sub(r'\|-(-+)-\|', r'|:\1:|',
separator)
write(f, separator, blockquote)
else:
# no header row: insert dummy header row
write(f, re.sub(r'[^|\n]', ' ', line), blockquote)
write(f, re.sub(r'[^|\n]', '-', line), blockquote)
# TODO: separators need at least three dashes
write(f, line, blockquote)
table = True
continue
else:
if table:
# first line after a table
if not re_whitespace_only.match(line):
# insert empty line
write(f, '\n', blockquote)
table = False
write(f, line, blockquote)
except IOError as err:
error(err)
return False
return True
def migrate_tid_file(
tid_file: Path = None,
update: bool = False,
output_directory: Path = None,
tables: bool = False) -> bool:
if output_directory:
meta_file = output_directory / tid_file.name
else:
meta_file = tid_file
meta_file = meta_file.with_suffix('.md.meta')
md_file = meta_file.parent / meta_file.stem # rstrip .meta
if (update and md_file.exists()
and os.path.getmtime(md_file) > os.path.getmtime(tid_file)):
# keep the existing markdown file because it's newer than
# the .tid file
return False
try:
with open(tid_file) as f:
lines = f.readlines()
except IOError as error:
error(err)
return False
if lines[0].startswith('color:'):
warning(f"'{tid_file.name}' is a tag/color tiddler. Skipping it.")
return False
if any([re_special_tag.match(line) for line in lines[:10]]):
# special tiddler (like a macro or table of contents) → skip
warning(f"'{tid_file.name}' is tagged as a special tiddler. "
"Skipping it.")
return False
if any([re_special_title.match(line) for line in lines[:10]]):
# special tiddler (title starts with $:/) → skip
warning(f"'{tid_file.name}' has a special title. "
"Skipping it.")
return False
if tables is False and any([re_table.match(line) for line in lines]):
warning(f"'{tid_file.name}' contains a table. Skipping it.")
return False
index_content = write_meta_file(lines, meta_file)
result = write_markdown_file(lines[index_content:], md_file)
if result is False:
meta_file.unlink()
md_file.unlink()
return result
def main(tid_files: list = None, update: bool = False,
delete_input: bool = False, output_directory: Path = None,
tables: bool = False):
skip_count = 0
migrate_count = 0
os.system("") # Enables color output on Windows 10 (not tested)
for tid_file in tid_files:
if tid_file.name.startswith('$__'):
warning(f"'{tid_file.name}' looks like a system tiddler. "
"Skipping it.")
skip_count += 1
continue
if migrate_tid_file(tid_file, update, output_directory, tables):
# TODO: Delete if migration was skipped because of existing
# markdown file?
if delete_input:
tid_file.unlink()
migrate_count += 1
else:
skip_count += 1
info(f"\n{migrate_count} tiddlers migrated to Markdown.")
if skip_count:
info(f"{skip_count} tiddlers skipped.")
if __name__ == '__main__':
parser = argparse.ArgumentParser(epilog=__doc__)
parser.add_argument("-u", "--update", action="store_true",
help="Migrate only when output file doesn't yet exist "
"or when it's older than the tid file.")
parser.add_argument("-d", "--delete", action="store_true",
help="Delete input files after migration.")
parser.add_argument("-t", "--tables", action="store_true",
help="Include tiddlers containing tables.")
parser.add_argument("-o", "--output-directory",
type=Path,
default=None,
help="Write markdown files in this directory.")
parser.add_argument("files", nargs='+',
type=Path,
help=".tid files to migrate to Markdown.")
args = parser.parse_args()
main(args.files, args.update, args.delete, args.output_directory,
args.tables)