From 257df10ffea317273969a00860a59007c9d0e2dc Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Wed, 12 Apr 2017 11:07:51 +0200 Subject: [PATCH] improve mkdoc.py determinism When processing many files that contain top-level items with the same name (e.g. "operator<<"), the output was non-deterministic and depended on the order in which the different Clang processes finished. This commit adds sorting that also accounts for the filename to prevent random changes from run to run. --- tools/mkdoc.py | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/tools/mkdoc.py b/tools/mkdoc.py index 400fb05da..1fd8cceed 100644 --- a/tools/mkdoc.py +++ b/tools/mkdoc.py @@ -56,26 +56,19 @@ CPP_OPERATORS = OrderedDict( job_count = cpu_count() job_semaphore = Semaphore(job_count) -registered_names = dict() - +output = [] def d(s): return s.decode('utf8') def sanitize_name(name): - global registered_names name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) for k, v in CPP_OPERATORS.items(): name = name.replace('operator%s' % k, 'operator_%s' % v) name = re.sub('<.*>', '', name) name = ''.join([ch if ch.isalnum() else '_' for ch in name]) name = re.sub('_$', '', re.sub('_+', '_', name)) - if name in registered_names: - registered_names[name] += 1 - name += '_' + str(registered_names[name]) - else: - registered_names[name] = 1 return '__doc_' + name @@ -189,8 +182,7 @@ def process_comment(comment): return result.rstrip().lstrip('\n') -def extract(filename, node, prefix, output): - num_extracted = 0 +def extract(filename, node, prefix): if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)): return 0 @@ -201,9 +193,7 @@ def extract(filename, node, prefix, output): sub_prefix += '_' sub_prefix += d(node.spelling) for i in node.get_children(): - num_extracted += extract(filename, i, sub_prefix, output) - if num_extracted == 0: - return 0 + extract(filename, i, sub_prefix) if node.kind in PRINT_LIST: comment = d(node.raw_comment) if node.raw_comment is not None else '' comment = process_comment(comment) @@ -212,18 +202,15 @@ def extract(filename, node, prefix, output): sub_prefix += '_' if len(node.spelling) > 0: name = sanitize_name(sub_prefix + d(node.spelling)) - output.append('\nstatic const char *%s =%sR"doc(%s)doc";' % - (name, '\n' if '\n' in comment else ' ', comment)) - num_extracted += 1 - return num_extracted + global output + output.append((name, filename, comment)) class ExtractionThread(Thread): - def __init__(self, filename, parameters, output): + def __init__(self, filename, parameters): Thread.__init__(self) self.filename = filename self.parameters = parameters - self.output = output job_semaphore.acquire() def run(self): @@ -232,7 +219,7 @@ class ExtractionThread(Thread): index = cindex.Index( cindex.conf.lib.clang_createIndex(False, True)) tu = index.parse(self.filename, self.parameters) - extract(self.filename, tu.cursor, '', self.output) + extract(self.filename, tu.cursor, '') finally: job_semaphore.release() @@ -289,18 +276,26 @@ if __name__ == '__main__': #endif ''') - output = [] + output.clear() for filename in filenames: - thr = ExtractionThread(filename, parameters, output) + thr = ExtractionThread(filename, parameters) thr.start() print('Waiting for jobs to finish ..', file=sys.stderr) for i in range(job_count): job_semaphore.acquire() - output.sort() - for l in output: - print(l) + name_ctr = 1 + name_prev = None + for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))): + if name == name_prev: + name_ctr += 1 + name = name + "_%i" % name_ctr + else: + name_prev = name + name_ctr = 1 + print('\nstatic const char *%s =%sR"doc(%s)doc";' % + (name, '\n' if '\n' in comment else ' ', comment)) print(''' #if defined(__GNUG__)