forked from just-the-docs/just-the-docs
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathreferences.py
135 lines (107 loc) · 3.91 KB
/
references.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
The intent here is to:
1. Extract all links (including other references) and markdown filenames/titles.
2. Save the graph of references in a JSON file.
3. Use javascript (?) to append external references on pages from the JSON.
"""
import json
import pprint
pp = pprint.PrettyPrinter(width=41, compact=True)
from pathlib import Path
import re
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import html
def slugify(s):
return s.lower().replace(" ", "-").replace("_","-")
def fname_to_slug(s):
return slugify(s[:-3])
# maps page-slug to referrer list.
# {'operations': [{'index'}], ... }
refs = {} # external links by page
paths = {} # mapping of page names to url
links = {} # outbound links by page
class HighlightRenderer(mistune.Renderer):
def set_file_slug(self, fname):
self.file_slug = fname_to_slug(fname)
links[self.file_slug] = {
'outbound': []
}
def link(self, link, title, content):
if hasattr(self, 'file_slug'):
if not link.startswith('http://') and \
not link.startswith('https://') and \
not link.startswith ("#") and \
not link.startswith('mailto:'):
links[self.file_slug]['outbound'].append(link)
return super().link(link, title, content)
def wiki_link(self, link, text):
return "Wiki link detected, but this renderer is not used."
class WikiLinkInlineLexer(mistune.InlineLexer):
def enable_wiki_link(self):
# add wiki_link rules
self.rules.wiki_link = re.compile(
r'\[\[' # [[
r'([\s\S]+?)' # Page 2|Page 2
r'\]\](?!\])' # ]]
)
# Add wiki_link parser to default rules
# you can insert it some place you like
# but place matters, maybe 3 is not good
self.default_rules.insert(3, 'wiki_link')
def output_wiki_link(self, m):
text = m.group(1)
alt, link = text.split('|')
# you can create an custom render
# you can also return the html if you like
return self.renderer.wiki_link(alt, link)
renderer = HighlightRenderer()
inline = WikiLinkInlineLexer(renderer)
inline.enable_wiki_link()
markdown = mistune.Markdown(renderer=renderer, inline=inline)
print(markdown('''
[abc](https://google.com)
[[def|ghi]]
'''))
for f in list(Path(".").rglob("*.[mM][dD]")):
md = open(f).read()
web_path = str(f)[:-3]
node = fname_to_slug(f.name)
# Do not include README files in this generator.
if node == 'readme':
continue
else:
print("NODE:", node)
if node in paths:
raise Exception("Duplicate node detected {} != {}. Please never have 2 markdown files with the same name, as filenames are used as node IDs for bidirectional links.".format(
web_path, paths[node]
))
# If the path ends with 'index', that means we can remove the last token since this is the default, root file.
if web_path.endswith('index'):
web_path = web_path[:-5]
paths[node] = web_path
markdown.renderer.set_file_slug(f.name)
markdown(md)
for k, v in links.items():
for target in v['outbound']:
target_slug = slugify(target.strip("/").split("/")[-1])
if target_slug not in refs:
refs[target_slug] = {}
if target_slug not in links.keys():
print('no source file found, for slug: {}, referred by {}'.format(target_slug, k))
else:
refs[target_slug][k] = True
# Should be handled above already, TODO: remove this.
# for k in list(refs.keys()):
# if k not in links.keys():
# # exclude refs that do not resolve.
# del refs[k]
print("INBOUND LINKS:")
refs = {k:list(v.keys()) for k,v in refs.items()}
output = {
'refs': refs,
'paths': paths
}
pp.pprint(output)
open('references.json','w').write(json.dumps(output))