forked from PaddlePaddle/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_doc.py
executable file
·1257 lines (1139 loc) · 44.5 KB
/
gen_doc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import paddle
import os
import shutil
import time
import pkgutil
import types
import contextlib
import argparse
import json
import sys
import inspect
import ast
import logging
import importlib
import re
import subprocess
import multiprocessing
import platform
import extract_api_from_docs
"""
generate api_info_dict.json to describe all info about the apis.
"""
en_suffix = "_en.rst"
cn_suffix = "_cn.rst"
NOT_DISPLAY_DOC_LIST_FILENAME = "./not_display_doc_list"
DISPLAY_DOC_LIST_FILENAME = "./display_doc_list"
CALLED_APIS_IN_THE_DOCS = './called_apis_from_docs.json' # in the guides and tutorials documents
SAMPLECODE_TEMPDIR = './sample-codes'
RUN_ON_DEVICE = "cpu"
EQUIPPED_DEVICES = set(['cpu'])
GPU_ID = 0
# key = id(api), value = dict of api_info{
# "id":id,
# "all_names":[], # all full_names
# "full_name":"", # the real name, and the others are the alias name
# "short_name":"", # without module name
# "module_name":"", # the module of the real api belongs to
# "display":True/Flase, # consider the not_display_doc_list and the display_doc_list
# "has_overwrited_doc":True/False #
# "doc_filename" # document filename without suffix
# "suggested_name":"", # the shortest name in all_names
# }
api_info_dict = {}
parsed_mods = {}
referenced_from_apis_dict = {}
referenced_from_file_titles = {}
logger = logging.getLogger()
if logger.handlers:
# we assume the first handler is the one we want to configure
console = logger.handlers[0]
else:
console = logging.StreamHandler()
logger.addHandler(console)
console.setFormatter(
logging.Formatter(
"%(asctime)s - %(funcName)s:%(lineno)d - %(levelname)s - %(message)s"))
# step 1: walkthrough the paddle package to collect all the apis in api_set
def get_all_api(root_path='paddle', attr="__all__"):
"""
walk through the paddle package to collect all the apis.
"""
global api_info_dict
api_counter = 0
for filefinder, name, ispkg in pkgutil.walk_packages(
path=paddle.__path__, prefix=paddle.__name__ + '.'):
try:
if name in sys.modules:
m = sys.modules[name]
else:
# importlib.import_module(name)
m = eval(name)
continue
except AttributeError:
logger.warning("AttributeError occurred when `eval(%s)`", name)
pass
else:
api_counter += process_module(m, attr)
api_counter += process_module(paddle, attr)
logger.info('%s: collected %d apis, %d distinct apis.', attr, api_counter,
len(api_info_dict))
def insert_api_into_dict(full_name, gen_doc_anno=None):
"""
insert add api into the api_info_dict
Return:
api_info object or None
"""
try:
obj = eval(full_name)
fc_id = id(obj)
except AttributeError:
logger.warning("AttributeError occurred when `id(eval(%s))`",
full_name)
return None
except:
logger.warning("Exception occurred when `id(eval(%s))`", full_name)
return None
else:
logger.debug("adding %s to api_info_dict.", full_name)
if fc_id in api_info_dict:
api_info_dict[fc_id]["all_names"].add(full_name)
else:
api_info_dict[fc_id] = {
"all_names": set([full_name]),
"id": fc_id,
"object": obj,
"type": type(obj).__name__,
}
docstr = inspect.getdoc(obj)
if docstr:
api_info_dict[fc_id]["docstring"] = inspect.cleandoc(docstr)
if gen_doc_anno:
api_info_dict[fc_id]["gen_doc_anno"] = gen_doc_anno
return api_info_dict[fc_id]
# step 1 fill field : `id` & `all_names`, type, docstring
def process_module(m, attr="__all__"):
api_counter = 0
if hasattr(m, attr):
# may have duplication of api
for api in set(getattr(m, attr)):
if api[0] == '_': continue
# Exception occurred when `id(eval(paddle.dataset.conll05.test, get_dict))`
if ',' in api: continue
# api's fullname
full_name = m.__name__ + "." + api
api_info = insert_api_into_dict(full_name)
if api_info is not None:
api_counter += 1
if inspect.isclass(api_info['object']):
for name, value in inspect.getmembers(api_info['object']):
if (not name.startswith("_")):
method_full_name = full_name + '.' + name # value.__name__
if name and value and isinstance(value, property):
method_api_info = insert_api_into_dict(
method_full_name, 'class_property')
if method_api_info is not None:
api_counter += 1
elif hasattr(value, '__name__'):
method_api_info = insert_api_into_dict(
method_full_name, 'class_method')
if method_api_info is not None:
api_counter += 1
return api_counter
# step 3 fill field : args, src_file, lineno, end_lineno, short_name, full_name, module_name, doc_filename
def set_source_code_attrs():
"""
should has 'full_name' first.
"""
src_file_start_ind = len(paddle.__path__[0]) - len('paddle/')
# ast module has end_lineno attr after py 3.8
for id_api in api_info_dict:
item = api_info_dict[id_api]
obj = item["object"]
obj_type_name = item["type"]
logger.debug("processing %s:%s:%s", obj_type_name, item["id"],
str(obj))
if obj_type_name == "module":
if hasattr(obj, '__file__') and obj.__file__ is not None and len(
obj.__file__) > src_file_start_ind:
api_info_dict[id_api]["src_file"] = obj.__file__[
src_file_start_ind:]
parse_module_file(obj)
api_info_dict[id_api]["full_name"] = obj.__name__
api_info_dict[id_api]["package"] = obj.__package__
api_info_dict[id_api]["short_name"] = split_name(obj.__name__)[1]
elif hasattr(obj, '__module__') and obj.__module__ in sys.modules:
mod_name = obj.__module__
mod = sys.modules[mod_name]
parse_module_file(mod)
else:
if hasattr(obj, '__name__'):
mod_name, short_name = split_name(obj.__name__)
if mod_name in sys.modules:
mod = sys.modules[mod_name]
parse_module_file(mod)
else:
logger.debug("{}, {}, {}".format(item["id"], item["type"],
item["all_names"]))
else:
found = False
for name in item["all_names"]:
mod_name, short_name = split_name(name)
if mod_name in sys.modules:
mod = sys.modules[mod_name]
parse_module_file(mod)
found = True
if not found:
logger.debug("{}, {}, {}".format(item["id"], item["type"],
item["all_names"]))
def split_name(name):
try:
r = name.rindex('.')
return [name[:r], name[r + 1:]]
except:
return ['', name]
def parse_module_file(mod):
skip_this_mod = False
if mod in parsed_mods:
skip_this_mod = True
if skip_this_mod:
return
else:
parsed_mods[mod] = True
src_file_start_ind = len(paddle.__path__[0]) - len('paddle/')
has_end_lineno = sys.version_info > (3, 8)
if hasattr(mod, '__name__') and hasattr(mod, '__file__'):
src_file = mod.__file__
mod_name = mod.__name__
logger.debug("parsing %s:%s", mod_name, src_file)
if len(mod_name) >= 6 and mod_name[:6] == 'paddle':
if os.path.splitext(src_file)[1].lower() == '.py':
mod_ast = ast.parse(open(src_file, "r").read())
for node in mod_ast.body:
short_names = []
if ((isinstance(node, ast.ClassDef) or
isinstance(node, ast.FunctionDef)) and
hasattr(node, 'name') and
hasattr(sys.modules[mod_name],
node.name) and node.name[0] != '_'):
short_names.append(node.name)
elif isinstance(node, ast.Assign):
for target in node.targets:
if hasattr(target, 'id') and target.id[0] != '_':
short_names.append(target.id)
else:
pass
for short_name in short_names:
obj_full_name = mod_name + '.' + short_name
logger.debug("processing %s", obj_full_name)
try:
obj_this = eval(obj_full_name)
obj_id = id(obj_this)
except:
logger.warning("%s maybe %s.%s", obj_full_name,
mod.__package__, short_name)
obj_full_name = mod.__package__ + '.' + short_name
try:
obj_this = eval(obj_full_name)
obj_id = id(obj_this)
except:
continue
if obj_id in api_info_dict and "lineno" not in api_info_dict[
obj_id]:
api_info_dict[obj_id]["src_file"] = src_file[
src_file_start_ind:]
api_info_dict[obj_id][
"doc_filename"] = obj_full_name.replace('.',
'/')
api_info_dict[obj_id]["full_name"] = obj_full_name
api_info_dict[obj_id]["short_name"] = short_name
api_info_dict[obj_id]["module_name"] = mod_name
api_info_dict[obj_id]["lineno"] = node.lineno
if has_end_lineno:
api_info_dict[obj_id][
"end_lineno"] = node.end_lineno
if isinstance(node, ast.FunctionDef):
api_info_dict[obj_id][
"args"] = gen_functions_args_str(node)
elif isinstance(node, ast.ClassDef):
for n in node.body:
if hasattr(
n,
'name') and n.name == '__init__':
api_info_dict[obj_id][
"args"] = gen_functions_args_str(n)
break
else:
logger.debug("%s omitted", obj_full_name)
else: # pybind11 ...
for short_name in mod.__dict__:
if short_name[0] != '_':
obj_full_name = mod_name + '.' + short_name
logger.debug("processing %s", obj_full_name)
try:
obj_this = eval(obj_full_name)
obj_id = id(obj_this)
except:
logger.warning("%s eval error", obj_full_name)
continue
if obj_id in api_info_dict and "lineno" not in api_info_dict[
obj_id]:
api_info_dict[obj_id]["src_file"] = src_file[
src_file_start_ind:]
api_info_dict[obj_id]["full_name"] = obj_full_name
api_info_dict[obj_id]["short_name"] = short_name
api_info_dict[obj_id]["module_name"] = mod_name
api_info_dict[obj_id][
"doc_filename"] = obj_full_name.replace('.',
'/')
else:
logger.debug("%s omitted", obj_full_name)
def gen_functions_args_str(node):
str_args_list = []
if isinstance(node, ast.FunctionDef):
# 'args', 'defaults', 'kw_defaults', 'kwarg', 'kwonlyargs', 'posonlyargs', 'vararg'
for arg in node.args.args:
if not arg.arg == 'self':
str_args_list.append(arg.arg)
defarg_ind_start = len(str_args_list) - len(node.args.defaults)
for defarg_ind in range(len(node.args.defaults)):
if isinstance(node.args.defaults[defarg_ind], ast.Name):
str_args_list[defarg_ind_start + defarg_ind] += '=' + str(
node.args.defaults[defarg_ind].id)
elif isinstance(node.args.defaults[defarg_ind], ast.Constant):
str_args_list[defarg_ind_start + defarg_ind] += '=' + str(
node.args.defaults[defarg_ind].value)
if node.args.vararg is not None:
str_args_list.append('*' + node.args.vararg.arg)
if len(node.args.kwonlyargs) > 0:
if node.args.vararg is None:
str_args_list.append('*')
for kwoarg, d in zip(node.args.kwonlyargs, node.args.kw_defaults):
if isinstance(d, ast.Constant):
str_args_list.append("{}={}".format(kwoarg.arg, d.value))
elif isinstance(d, ast.Name):
str_args_list.append("{}={}".format(kwoarg.arg, d.id))
if node.args.kwarg is not None:
str_args_list.append('**' + node.args.kwarg.arg)
return ', '.join(str_args_list)
# step 2 fill field : `display`
def set_display_attr_of_apis():
"""
set the display attr
"""
if os.path.exists(NOT_DISPLAY_DOC_LIST_FILENAME):
display_none_apis = set([
line.strip() for line in open(NOT_DISPLAY_DOC_LIST_FILENAME, "r")
])
else:
logger.warning("file not exists: %s", NOT_DISPLAY_DOC_LIST_FILENAME)
display_none_apis = set()
if os.path.exists(DISPLAY_DOC_LIST_FILENAME):
display_yes_apis = set(
[line.strip() for line in open(DISPLAY_DOC_LIST_FILENAME, "r")])
else:
logger.warning("file not exists: %s", DISPLAY_DOC_LIST_FILENAME)
display_yes_apis = set()
logger.info(
'display_none_apis has %d items, display_yes_apis has %d items',
len(display_none_apis), len(display_yes_apis))
# file the same apis
for id_api in api_info_dict:
all_names = api_info_dict[id_api]["all_names"]
display_yes = False
for n in all_names:
if n in display_yes_apis:
display_yes = True
break
if display_yes:
api_info_dict[id_api]["display"] = True
else:
display_yes = True
for n in all_names:
for dn in display_none_apis:
if n.startswith(dn):
display_yes = False
break
if not display_yes:
break
if not display_yes:
api_info_dict[id_api]["display"] = False
logger.info("set {} display to False".format(id_api))
def set_api_sketch():
"""
set the in_api_sktech attr. may replace the set_display_attr_of_apis.
"""
global api_info_dict
modulelist = [ #noqa
paddle,
paddle.amp,
paddle.nn,
paddle.nn.functional,
paddle.nn.initializer,
paddle.nn.utils,
paddle.static,
paddle.static.nn,
paddle.io,
paddle.jit,
paddle.metric,
paddle.distribution,
paddle.optimizer,
paddle.optimizer.lr,
paddle.regularizer,
paddle.text,
paddle.utils,
paddle.utils.download,
paddle.utils.profiler,
paddle.utils.cpp_extension,
paddle.utils.unique_name,
paddle.sysconfig,
paddle.vision,
paddle.vision.datasets,
paddle.vision.models,
paddle.vision.transforms,
paddle.vision.ops,
paddle.distributed,
paddle.distributed.fleet,
paddle.distributed.fleet.utils,
paddle.distributed.parallel,
paddle.distributed.utils,
paddle.callbacks,
paddle.hub,
paddle.autograd,
paddle.incubate,
paddle.inference,
paddle.onnx,
paddle.device
]
alldict = {}
for module in modulelist:
if hasattr(module, '__all__'):
old_all = module.__all__
else:
old_all = []
dirall = dir(module)
for item in dirall:
if item.startswith('__'):
continue
old_all.append(item)
alldict.update({module.__name__: old_all})
old_all = []
dirall = dir(paddle.Tensor)
for item in dirall:
if item.startswith('_'):
continue
old_all.append(item)
alldict.update({'paddle.Tensor': old_all})
all_api_found = {}
for m, apis in alldict.items():
for api in apis:
all_api_found['{}.{}'.format(m, api)] = False
for api in all_api_found.keys():
for id_api in api_info_dict.keys():
if ('all_names' in api_info_dict[id_api]) and (
api in api_info_dict[id_api]['all_names']):
all_api_found[api] = True
api_info_dict[id_api]['in_api_sketch'] = True
if 'api_sketch_names' not in api_info_dict[id_api]:
api_info_dict[id_api]['api_sketch_names'] = []
api_info_dict[id_api]['api_sketch_names'].append(api)
break
api_not_in_dict = [api for api in all_api_found if not all_api_found[api]]
if api_not_in_dict:
logger.warning("some apis are not in api_info_dict: %s",
str(api_not_in_dict))
# step fill field: referenced_from
def set_referenced_from_attr():
"""
set the referenced_from field.
values are the guides and tutorial documents.
"""
global api_info_dict
global referenced_from_apis_dict, referenced_from_file_titles
if len(referenced_from_apis_dict) > 0 and len(
referenced_from_file_titles) > 0:
apis_refers = referenced_from_apis_dict
rev_apis_refers = {}
for docfn in apis_refers:
for api in apis_refers[docfn]:
if api in rev_apis_refers:
rev_apis_refers[api].append(docfn)
else:
rev_apis_refers[api] = [docfn]
for api in rev_apis_refers:
try:
m = eval(api)
except AttributeError:
logger.warning("AttributeError: %s", api)
else:
api_id = id(m)
if api_id in api_info_dict:
ref_from = []
for a in rev_apis_refers[api]:
ref_from.append({
'file':
a,
'title':
referenced_from_file_titles[a]
if a in referenced_from_file_titles else ''
})
api_info_dict[api_id]["referenced_from"] = ref_from
else:
logger.warning("%s (id:%d) not in the api_info_dict.", api,
api_id)
def collect_referenced_from_infos(docdirs):
"""
collect all the referenced_from infos from ../guides and ../tutorial
"""
global referenced_from_apis_dict, referenced_from_file_titles
referenced_from_apis_dict, referenced_from_file_titles = extract_api_from_docs.extract_all_infos(
docdirs)
def get_shortest_api(api_list):
"""
find the shortest api name (suggested name) in list.
Problems:
1. fuild - if there is any apis don't contain 'fluid' in name, use them.
2. core vs core_avx - using the 'core'.
"""
if len(api_list) == 1:
return api_list[0]
# try to find shortest path of api as the real api
api_info = [
] # {'name': name, 'fluid_in_name': True/False, 'core_avx_in_name': True/Flase', 'len': len}
for api in api_list:
fields = api.split('.')
api_info.append({
'name': api,
'fluid_in_name': 'fluid' in fields,
'core_avx_in_name': 'core_avx' in fields,
'len': len(fields),
})
def shortest(api_info):
if not api_info:
return None
elif len(api_info) == 1:
return api_info[0].get('name')
api_info.sort(key=lambda ele: ele.get('len'))
return api_info[0].get('name')
if not all([api.get('fuild_in_name') for api in api_info]):
api_info = [api for api in api_info if not api.get('fluid_in_name')]
sn = shortest([api for api in api_info if not api.get('core_avx_in_name')])
if sn is None:
sn = shortest(api_info)
return sn
def remove_all_en_files(path="./paddle"):
"""
remove all the existed en doc files
"""
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(en_suffix):
os.remove(os.path.join(root, file))
def is_Tensor_method(api_info):
for n in api_info['all_names']:
if n.startswith('paddle.Tensor'):
return True
return False
# using `doc_filename`
def gen_en_files(api_label_file="api_label"):
"""
generate all the en doc files.
"""
with open(api_label_file, 'w') as api_label:
for id_api, api_info in api_info_dict.items():
# api_info = api_info_dict[id_api]
if 'full_name' in api_info and api_info['full_name'].endswith(
'Overview'):
continue
elif "display" in api_info and not api_info["display"]:
logger.debug("{} display False".format(id_api))
continue
elif 'type' in api_info and api_info['type'] in [
'module', 'method', 'VarType',
'builtin_function_or_method', 'dict', 'float', 'str'
]:
continue
elif 'gen_doc_anno' in api_info and api_info[
'gen_doc_anno'] == 'class_method' and (
not is_Tensor_method(api_info)):
continue
elif "doc_filename" not in api_info:
logger.debug(
"{} does not have doc_filename field.".format(id_api))
continue
else:
logger.debug(api_info["doc_filename"])
path = os.path.dirname(api_info["doc_filename"])
if not os.path.exists(path):
os.makedirs(path)
f = api_info["doc_filename"] + en_suffix
if os.path.exists(f):
continue
gen = EnDocGenerator(api_info)
api_name, api_ref_name = gen()
if api_name and api_ref_name:
api_label.write("{}\t.. {}:\n".format(api_name, api_ref_name))
def check_cn_en_match(path="./paddle", diff_file="en_cn_files_diff"):
"""
skip
"""
osp_join = os.path.join
osp_exists = os.path.exists
with open(diff_file, 'w') as fo:
tmpl = "{}\t{}\n"
fo.write(tmpl.format("exist", "not_exits"))
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(en_suffix):
cf = file.replace(en_suffix, cn_suffix)
if not osp_exists(osp_join(root, cf)):
fo.write(
tmpl.format(
osp_join(root, file), osp_join(root, cf)))
elif file.endswith(cn_suffix):
ef = file.replace(cn_suffix, en_suffix)
if not osp_exists(osp_join(root, ef)):
fo.write(
tmpl.format(
osp_join(root, file), osp_join(root, ef)))
class EnDocGenerator(object):
"""
skip
"""
def __init__(self, api_info):
"""
init
"""
self.api_info = api_info
if 'suggested_name' in self.api_info:
self.api_name = self.api_info['suggested_name']
elif 'full_name' in self.api_info:
self.api_name = self.api_info['full_name']
else:
logger.warning("%s has no attr called full_name/suggested_name",
str(self.api_info))
self.api_name = None
self.api_ref_name = '_api_' + self.api_name.replace(
'.', '_') if self.api_name else None
# disarding the api_info['short_name'], cause it may be different.
_, self.short_name = split_name(self.api_name)
self.stream = None
self.object = None
@contextlib.contextmanager
def guard(self, filename):
"""
open the file
"""
assert self.stream is None, "stream must be None"
self.stream = open(filename, 'w')
yield
self.stream.close()
self.stream = None
def print_item(self):
"""
as name
"""
if isinstance(self.object, type):
self.print_class()
elif isinstance(self.object, types.FunctionType):
self.print_function()
else:
logger.warning("%s: not supported type %s",
str(self.api_name), type(self.object))
def print_header_reminder(self):
"""
as name
"""
self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
''')
def _print_ref_(self):
"""
as name
"""
if self.api_name is None:
return
self.stream.write(".. {}:\n\n".format(self.api_ref_name))
def _print_header_(self, name, dot, is_title):
"""
as name
"""
mo = re.match(r'^(.*?)(_+)$', name)
if mo:
name = mo.group(1) + r'\_' * len(mo.group(2))
dot_line = dot * len(name)
if is_title:
self.stream.write(dot_line)
self.stream.write('\n')
self.stream.write(name)
self.stream.write('\n')
self.stream.write(dot_line)
self.stream.write('\n')
self.stream.write('\n')
def print_class(self):
"""
as name
"""
self._print_ref_()
self._print_header_(self.short_name, dot='-', is_title=False)
cls_templates = {
'default':
'''.. autoclass:: {0}
:members:
:inherited-members:
:noindex:
''',
'no-inherited':
'''.. autoclass:: {0}
:members:
:noindex:
''',
'fluid.optimizer':
'''.. autoclass:: {0}
:members:
:inherited-members:
:exclude-members: apply_gradients, apply_optimize, backward, load
:noindex:
'''
}
tmpl = 'default'
for m in [
'fluid.dygraph', 'paddle.vision', 'paddle.callbacks',
'paddle.hapi.callbacks', 'paddle.io', 'paddle.nn'
]:
if self.api_name.startswith(m):
tmpl = 'no-inherited'
if tmpl == 'default':
for m in ["paddle.optimizer", "fluid.optimizer"]:
if self.api_name.startswith(m):
tmpl = 'fluid.optimizer'
self.stream.write(cls_templates[tmpl].format(self.api_name))
def print_function(self):
"""
as name
"""
self._print_ref_()
self._print_header_(self.short_name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: {}
:noindex:
'''.format(self.api_name))
def __call__(self):
"""
generate the rst file.
"""
try:
if 'object' in self.api_info:
self.object = self.api_info['object']
elif self.api_name is not None:
self.object = eval(self.api_name)
else:
logger.warning(
"%s has no attr called object/full_name/suggested_name",
str(self.api_info))
return None, None
except AttributeError:
logger.warning("attribute error for %s ", str(self.api_info))
return None, None
else:
if (not isinstance(self.object, type)) and (
not isinstance(self.object, types.FunctionType)):
logger.warning("%s: not supported type %s",
str(self.api_name), type(self.object))
return None, None
if self.api_name:
filename = self.api_info['doc_filename'] + en_suffix
with self.guard(filename):
self.print_header_reminder()
self.print_item()
return self.api_name, self.api_ref_name
def insert_suggested_names():
"""
add suggested_name field, updte the doc_filename, and sort the all_names.
"""
pat = re.compile(r'paddle\.fluid\.core_[\w\d]+\.(.*)$')
for id_api in api_info_dict:
if "all_names" not in api_info_dict[id_api]:
api_info_dict[id_api]["all_names"] = set()
if "full_name" in api_info_dict[id_api] and api_info_dict[id_api][
"full_name"] not in api_info_dict[id_api]["all_names"]:
api_info_dict[id_api]["all_names"].add(
api_info_dict[id_api]["full_name"])
for n in list(api_info_dict[id_api]["all_names"]):
# paddle.fluid.core_avx.* -> paddle.fluid.core.*
mo = pat.match(n)
if mo:
api_info_dict[id_api]["all_names"].add('paddle.fluid.core.' +
mo.group(1))
api_info_dict[id_api]["all_names"] = sorted(
list(api_info_dict[id_api]["all_names"]))
sn = get_shortest_api(api_info_dict[id_api]["all_names"])
if sn:
# Delete alias_name, api_info_dict[id_api]["alias_name"] = sn
api_info_dict[id_api]["suggested_name"] = sn
api_info_dict[id_api]["doc_filename"] = sn.replace('.', '/')
def filter_out_object_of_api_info_dict():
"""
filter out the object before dump json string.
"""
for id_api in api_info_dict:
if 'object' in api_info_dict[id_api]:
del api_info_dict[id_api]['object']
def extract_code_blocks_from_docstr(docstr):
"""
extract code-blocks from the given docstring.
DON'T include the multiline-string definition in code-blocks.
The *Examples* section must be the last.
Args:
docstr(str): docstring
Return:
code_blocks: A list of code-blocks, indent removed.
element {'name': the code-block's name, 'id': sequence id.
'codes': codes, 'required': 'gpu'}
"""
code_blocks = []
mo = re.search(r"Examples?:", docstr)
if mo is None:
return code_blocks
ds_list = docstr[mo.start():].replace("\t", ' ').split("\n")
lastlineindex = len(ds_list) - 1
cb_start_pat = re.compile(r"code-block::\s*python")
cb_param_pat = re.compile(r"^\s*:(\w+):\s*(\S*)\s*$")
cb_required_pat = re.compile(r"^\s*#\s*require[s|d]\s*:\s*(\S+)\s*$")
cb_info = {}
cb_info['cb_started'] = False
cb_info['cb_cur'] = []
cb_info['cb_cur_indent'] = -1
cb_info['cb_cur_name'] = None
cb_info['cb_cur_seq_id'] = 0
cb_info['cb_required'] = None
def _cb_started():
# nonlocal cb_started, cb_cur_name, cb_required, cb_cur_seq_id
cb_info['cb_started'] = True
cb_info['cb_cur_seq_id'] += 1
cb_info['cb_cur_name'] = None
cb_info['cb_required'] = None
def _append_code_block():
# nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id, cb_required
code_blocks.append({
'codes':
inspect.cleandoc("\n".join(cb_info['cb_cur'])),
'name':
cb_info['cb_cur_name'],
'id':
cb_info['cb_cur_seq_id'],
'required':
cb_info['cb_required'],
})
for lineno, linecont in enumerate(ds_list):
if re.search(cb_start_pat, linecont):
if not cb_info['cb_started']:
_cb_started()
continue
else:
# cur block end
if len(cb_info['cb_cur']):
_append_code_block()
_cb_started() # another block started
cb_info['cb_cur_indent'] = -1
cb_info['cb_cur'] = []
else:
if cb_info['cb_started']:
# handle the code-block directive's options
mo_p = cb_param_pat.match(linecont)
if mo_p:
if mo_p.group(1) == 'name':
cb_info['cb_cur_name'] = mo_p.group(2)
continue
# read the required directive
mo_r = cb_required_pat.match(linecont)
if mo_r:
cb_info['cb_required'] = mo_r.group(1)
# docstring end
if lineno == lastlineindex:
mo = re.search(r"\S", linecont)
if mo is not None and cb_info[
'cb_cur_indent'] <= mo.start():
cb_info['cb_cur'].append(linecont)
if len(cb_info['cb_cur']):
_append_code_block()
break
# check indent for cur block start and end.
if cb_info['cb_cur_indent'] < 0:
mo = re.search(r"\S", linecont)
if mo is None:
continue
# find the first non empty line
cb_info['cb_cur_indent'] = mo.start()
cb_info['cb_cur'].append(linecont)
else:
mo = re.search(r"\S", linecont)
if mo is None:
cb_info['cb_cur'].append(linecont)
continue
if cb_info['cb_cur_indent'] <= mo.start():
cb_info['cb_cur'].append(linecont)
else:
if linecont[mo.start()] == '#':
continue
else:
# block end
if len(cb_info['cb_cur']):
_append_code_block()
cb_info['cb_started'] = False
cb_info['cb_cur_indent'] = -1
cb_info['cb_cur'] = []
return code_blocks
def find_last_future_line_end(cbstr):
pat = re.compile('__future__.*\n')
lastmo = None
it = re.finditer(pat, cbstr)
while True:
try:
lastmo = next(it)
except StopIteration:
break
if lastmo:
return lastmo.end()
else:
return None
def extract_sample_codes_into_dir():
if os.path.exists(SAMPLECODE_TEMPDIR):
if not os.path.isdir(SAMPLECODE_TEMPDIR):
os.remove(SAMPLECODE_TEMPDIR)
os.mkdir(SAMPLECODE_TEMPDIR)
else:
os.mkdir(SAMPLECODE_TEMPDIR)
for id_api in api_info_dict:
if 'docstring' in api_info_dict[
id_api] and 'full_name' in api_info_dict[id_api]:
code_blocks = extract_code_blocks_from_docstr(
api_info_dict[id_api]['docstring'])
for cb_info in code_blocks:
fn = os.path.join(
SAMPLECODE_TEMPDIR, '{}.sample-code-{}.py'.format(