-
Notifications
You must be signed in to change notification settings - Fork 268
/
update-toc.py
198 lines (155 loc) · 7.36 KB
/
update-toc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# This script is intended for use in intermediate doc repos generated from docs.ms CI.
# Given a reference ToC and a set of namespaces, limit the reference to ToC entries that contain
# namespaces in our set.
import argparse
import os
import fnmatch
import re
import json
import xml.etree.ElementTree as ET
# by default, yaml does not maintain insertion order of the dicts
# given that this is intended to generate TABLE OF CONTENTS values,
# maintaining this order is important.
# The drop-in replacement oyaml is a handy solution for us.
import oyaml as yaml
MONIKER_REPLACEMENTS = ['{moniker}','<moniker>']
class PathResolver:
def __init__(self, doc_repo_location = None, moniker = ""):
self.excluded_href_paths = []
self.target_moniker = moniker
self.doc_repo_location = doc_repo_location
if self.doc_repo_location:
self.excluded_href_paths = self.get_non_standard_hrefs(self.doc_repo_location)
# the doc builds have the capability to reference readmes from external repos (they resolve during publishing)
# this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the
# "dependent repositories" than we should leave them exactly as is.
# amend_href is the core of the logic for handling referenced files and ensures that we cannot refer to the same readme twice
# from two different reference ymls
def amend_href(self, toc_dict):
if not self.doc_repo_location:
return toc_dict
input_string = toc_dict["href"]
# if this is an external readme, we should not attempt to resolve the file to a different one, just return with no changes
if any([input_string.startswith(href) for href in self.excluded_href_paths]):
return toc_dict
# create a resolvable path to the readme on disk, without any of the docs ms specificity
resolvable_path = os.path.normpath(os.path.join(self.doc_repo_location, input_string.replace("~/", "")))
# apply moniker folder adjustments if necessary
if self.target_moniker is not None:
for replacement in MONIKER_REPLACEMENTS:
# input string maintains leading ~/ necessary for docs. update the moniker folder if it exists
input_string = input_string.replace(replacement, self.target_moniker)
# the resolvable path is different from the input_string in that it is actually a resolvable path.
# update it with the moniker folder so we can test for existence of the file
resolvable_path = resolvable_path.replace(replacement, self.target_moniker)
possible_target_readme = os.path.splitext(resolvable_path)[0] + ".md"
if os.path.exists(possible_target_readme):
toc_dict["href"] = input_string
else:
toc_dict.pop("href")
toc_dict["landingPageType"] = "Service"
return toc_dict
# the doc builds have the capability to reference readmes from external repos (they resolve during publishing)
# this means that we can't simply check the href values for existence. If they are an href that STARTS with one of the
# "dependent repositories" than we should leave them exactly as is. This function returns the start paths
def get_non_standard_hrefs(self, doc_repo_location):
excluded_href_paths = []
target = os.path.join(doc_repo_location, ".openpublishing.publish.config.json")
with open(target, "r") as f:
data = json.load(f)
for dependent_repo in data["dependent_repositories"]:
excluded_href_paths.append("~/{}".format(dependent_repo["path_to_root"]))
return excluded_href_paths
def filter_children(targeted_ns_list, known_namespaces):
amended_list = []
for ns in targeted_ns_list:
# also need to handle when the namespace grep is a pattern
# azure-eventhubs* <-- for instance
if ns in known_namespaces:
amended_list.append(ns)
return amended_list
# a post-order recursive function that returns a modified reference.yml
# based on the set of namespaces that we've grabbed from autogenerated ToC.yml
def filter_toc(toc_dict, namespaces, path_resolver):
if toc_dict is None:
return None
# internal node
if "items" in toc_dict:
# recurse as mant times as necessary
item_list = []
for item in toc_dict['items']:
result_n = filter_toc(item, namespaces, path_resolver)
# only append the result if we know it exists
if result_n:
item_list.append(result_n)
if item_list:
toc_dict["items"] = item_list
else:
return None
# handle href
if "href" in toc_dict:
toc_dict = path_resolver.amend_href(toc_dict)
# leaf node
if "children" in toc_dict:
filtered_children = filter_children(toc_dict["children"], namespaces)
# if we filter out all the children, this node should simply cease to exist
if not filtered_children:
return None
elif "href" not in toc_dict and "items" not in toc_dict:
return None
return toc_dict
def grep_children_namespaces(autogenerated_toc_xml):
return [ns.attrib['Name'] for ns in ET.parse(args.namespaces).getroot()[1:] if ns.tag == 'Namespace'] + ['**']
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""
Combines a reference and target ToC. The new target ToC mirrors the reference, omitting ToC
entries that are NOT present in the preview output.
"""
)
parser.add_argument("-r", "--reference", help="The source ToC.yml", required=True)
parser.add_argument("-t", "--target", help="The target ToC.yml", required=True)
parser.add_argument(
"-n",
"--namespaces",
help="The ToC.yml where target autogenerated documentation exists",
required=True,
)
parser.add_argument(
"-d",
"--docrepo",
help="The root directory of the target documentation repository.",
required=True,
)
parser.add_argument(
"-m",
"--moniker",
help="Selected moniker. Used when filling in moniker-folder path updates.",
default="",
required=False,
)
args = parser.parse_args()
try:
target_autogenerated_toc = ET.parse(args.namespaces).getroot()[0]
except Exception as f:
print(
"Execution requires the known namespaces yml be defined. Please check if the target xml has assembly tags."
)
try:
with open(args.reference, "r") as reference_yml:
base_reference_toc = yaml.safe_load(reference_yml)
except Exception as f:
print(
"Execution requires the known reference yml be defined."
)
present_in_target = grep_children_namespaces(target_autogenerated_toc)
print(
"Here are the visible namespaces in target autogenerated ToC. Constraining reference.yml."
)
for ns in sorted(present_in_target):
print(" |__ " + ns)
path_resolver = PathResolver(doc_repo_location=args.docrepo, moniker=args.moniker)
base_reference_toc[0] = filter_toc(base_reference_toc[0], present_in_target, path_resolver)
updated_content = yaml.dump(base_reference_toc, default_flow_style=False)
with open(args.target, "w") as f:
f.write(updated_content)