Skip to content

Commit

Permalink
merging with origin/main
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesfrye committed Nov 5, 2021
2 parents 00dd505 + 43cf46e commit 59d23cd
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 230 deletions.
2 changes: 0 additions & 2 deletions .flake8

This file was deleted.

5 changes: 0 additions & 5 deletions _SUMMARY.md
Original file line number Diff line number Diff line change
@@ -1,5 +0,0 @@
# Table of contents

* [Documentation Generation](README.md)
{docugen}

79 changes: 64 additions & 15 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,35 +1,84 @@
[GLOBAL]
# global variables used in scripts
DIRNAME=ref
LIBRARY_DIRNAME=python

[DIRNAMES_TO_TITLES]
# key-value map taking directory names to their titles in the Gitbook sidebar
# auto-generated dirname/title maps are added in scripts
ref=Reference
cli=Command Line Interface
java=Java Library \[Beta\]
python=Python Library
data-types=Data Types
public-api=Import & Export API
python=Python Library
integrations=Integrations
java=Java Library \[Beta\]
keras=Keras

[SKIPS]
# subdirectories of ref/ to skip when creating table of contents/SUMMARY.md
elements=app,java

###
#
# SUBCONFIGurations for doc generation in each "module"
#
###

[EXAMPLE_SUBCONFIG]
# an example subconfig, all fields are mandatory but some can be empty
# see library.py for how these fields are used
dirname=name-of-directory-for-this
title=Human Readable Title for Sidebar
slug=prefix.for.markdown.filename.
elements=python,objects,that,you,want,to,document,commmaseparated
# see handle_additions in library.py for use of add-from and add-elements
add-from=submodule.toadd.elementsfrom
add-elements=elements,from,that,submodule
# see get_dunder_doc in library.py for use of module-doc-from
module-doc-from=other.module.with.dunderdoc

[SUBCONFIGS]
# add your subconfig's name here to document a new module
names=WANDB_CORE,WANDB_DATATYPES,WANDB_API,WANDB_INTEGRATIONS

[WANDB_CORE]
# main python client
dirname=python
title=Python Library
slug=wandb.
elements=Artifact,agent,config,controller,finish,init,log,save,summary,sweep,watch,__version__
add-from=wandb_sdk.wandb_run
add-elements=Run
module-doc-from=self

[WANDB_DATATYPES]
elements=Graph,Image,Plotly,Video,Audio,Table,Html,Object3D,Molecule,Histogram,BoundingBoxes2D,ImageMask
# data types submodule, including media and tables
dirname=data-types
title=Data Types
slug=wandb.data\_types.
elements=Graph,Image,Plotly,Video,Audio,Table,Html,Object3D,Molecule,Histogram
add-from=data_types
add-elements=ImageMask,BoundingBoxes2D
module-doc-from=data_types

[WANDB_API]
elements=Api,Projects,Project,Runs,Run,Sweep,Files,File,Artifact
# public API subdmodule
dirname=public-api
title=Import & Export API
slug=wandb.apis.public.
elements=
add-from=apis.public
add-elements=Api,Projects,Project,Runs,Run,Sweep,Files,File,Artifact
module-doc-from=apis.public

[WANDB_INTEGRATIONS]
elements=keras,
# - sklearn
# - gym
# - lightgbm
# - sacred
# - tensorflow
# - tensorboard
# - xgboost
# - fastai
# - torch
# - sagemaker
# integrations with other libraries that we host the code for
dirname=integrations
title=Integrations
# slugs for integrations are handled differently, see generate.py
slug=wandb.
elements=keras
add-from=
add-elements=ValidationDataLogger
module-doc-from=
152 changes: 93 additions & 59 deletions generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A tool to generate reference documentation for the
Weights & Biases client library and for the wandb CLI tool.
"""Generate reference documentation for Weights & Biases.
Creates docs for the Weights & Biases client library and for the wandb CLI tool.
For help, run:
Expand All @@ -16,6 +17,8 @@
import cli
import library

import util


config = configparser.ConfigParser()
config.read("config.ini")
Expand All @@ -24,6 +27,12 @@
DIRNAMES_TO_TITLES = config["DIRNAMES_TO_TITLES"]
SKIPS = config["SKIPS"]["elements"].split(",")

subconfig_names = config["SUBCONFIGS"]["names"].split(",")

subconfigs = util.process_subconfigs(config, subconfig_names)

WANDB_CORE, WANDB_DATATYPES, WANDB_API, WANDB_INTEGRATIONS = subconfigs


def main(args):
commit_id = args.commit_id
Expand All @@ -42,31 +51,30 @@ def main(args):
continue
shutil.rmtree(os.path.join(ref_dir, dirname), ignore_errors=True)

# Create the library docs
# create the library docs
library.build(commit_id, code_url_prefix, output_dir)

# convert .build output to GitBook format
rename_to_readme(ref_dir)

# Create the CLI docs
# create the CLI docs
cli.build(ref_dir)

# Change Folder with single README to file.md
# change folders with single README to file.md
single_folder_format(ref_dir)

# fill the SUMMARY.md with generated doc files,
# based on provided template.
populate_summary(ref_dir, template_file, output_dir=output_dir)
populate_summary(output_dir, template_file, output_dir=output_dir)

# clean_names(ref_dir)
# clean up the file names
clean_names(ref_dir)


def populate_summary(
docgen_folder: str, template_file: str = "_SUMMARY.md", output_dir: str = "."
) -> None:
"""Populates the output file with generated file names
by filling in the template_file at the {docugen} location.
"""Populates SUMMARY.md file describing gitbook sidebar.
GitBook uses a `SUMMARY.md` file to determine which
files to show in the sidebar. When using docugen,
Expand All @@ -80,45 +88,41 @@ def populate_summary(
output_dir: str. Directory into which to write the final
SUMMARY.md file.
"""
docugen_markdown = walk_docugen("ref", output_dir=Path(docgen_folder), base=Path(docgen_folder))

with open(template_file, "r") as f:
doc_structure = f.read()

docugen_markdown = walk_docugen(docgen_folder)
old_summary = f.readlines()
doc_structure = clean_summary(old_summary)

doc_structure = doc_structure.format(docugen=docugen_markdown)

with open(os.path.join(output_dir, "SUMMARY.md"), "w") as f:
f.write(doc_structure)


def walk_docugen(folder: str) -> str:
"""Walks a folder, pulls out all of the markdown files,
formats their names into markdown strings with appropriate links
and formatting for a GitBook SUMMARY.md, then returns that block of markdown.
"""
def walk_docugen(folder: str, output_dir: Path, base: Path) -> str:
"""Walk a folder and return a markdown-formatted list of markdown files."""
path, dirs, files = next(os.walk(base / folder))
dirs.sort(), files.sort() # ensure alphabetical order for directories and files

docugen_markdowns = []
indent = 0
for path, dirs, files in os.walk(folder):
if any("ref/" + skip in path for skip in SKIPS):
continue
dirs.sort()
files.sort()
path = str(Path(path).relative_to(Path(folder).parent))
is_subdir = "/" in path
if is_subdir:
components = path.split("/")
indent = len(components) - 1
name = components[-1]
else:
name = path
title = convert_name(name)
docugen_markdowns.append(" " * indent + f"* [{title}]({path}/README.md)")
if any("ref/" + skip in path for skip in SKIPS): # apply skipping of directories
return ""

# extract title information
path = Path(path)
indent, title, relative_path = get_info_markdown_path(path, output_dir)
docugen_markdown = " " * indent + f"* [{title}]({relative_path}/README.md)\n"

# recursively generate markdown from sub-directories
for dir in dirs:
docugen_markdown += walk_docugen(dir, output_dir, path)

docugen_markdowns.extend(add_files(files, path, indent))
# add files from this directory
docugen_markdown += add_files(files, relative_path, indent)

docugen_markdown = "\n".join(docugen_markdowns)
# if needed, add in a final newline
if not docugen_markdown.endswith("\n"):
docugen_markdown += "\n"

return docugen_markdown

Expand All @@ -138,22 +142,25 @@ def add_files(files: list, root: str, indent: int) -> list:
)
file_markdowns.append(file_markdown)

return file_markdowns
files_markdown = "\n".join(file_markdowns)
return files_markdown


def get_prefix(path):
if path == DIRNAME:
return [], ""
elif "data-types" in path:
return "wandb.data\_types." # noqa
return WANDB_DATATYPES["slug"]
elif "public-api" in path:
return "wandb.apis.public."

return WANDB_API["slug"]
elif "integrations" in path:
starter_slug = WANDB_INTEGRATIONS["slug"]
package_name = path.split("/")[-1]
return f"wandb.{package_name}."
if package_name == "integrations":
package_name = "sdk.integration_utils.data_logging"
return f"{starter_slug}{package_name}."
elif "python" in path:
return "wandb."
return WANDB_CORE["slug"]
elif "java" or "app" in path:
return ""
else:
Expand All @@ -170,9 +177,7 @@ def convert_name(name):


def rename_to_readme(directory):
"""Moves all the folder-level markdown files into
their respective folders, as a README."""

"""Moves all the folder-level markdown files into their respective folders, as a README."""
for root, folders, file_names in os.walk(directory):
for file_name in file_names:
raw_file_name, suffix = file_name[:-3], file_name[-3:]
Expand All @@ -184,7 +189,7 @@ def rename_to_readme(directory):


def clean_names(directory):
"""Converts names to lower case and removes spaces"""
"""Converts names to lower case and removes spaces."""
for root, folders, file_names in os.walk(directory):
for name in file_names:
if name == "README.md":
Expand All @@ -198,17 +203,17 @@ def clean_names(directory):


def single_folder_format(directory):
"""
Convert single file folders to single files.
"""Converts all sub-folders that only contain README.md to single files, as expected by GitBook.
This is done to combat the huge differences generated
by GitBook.
eg.
- folder
- README.md
So the tree:
- folder
- README.md
changes to
- folder.md
- folder.md
Args:
directory: str. The directory to walk through.
"""
for root, folders, file_names in os.walk(directory):
number_of_folders = len(folders)
Expand All @@ -232,6 +237,37 @@ def filter_files(directory, files_to_remove):
os.remove(os.path.join(f"{root}", f"{file_name}"))


def get_info_markdown_path(path, output_dir):
relative_path = str(path.relative_to(output_dir))
components = relative_path.split("/")
indent, name = len(components) - 1, components[-1]
title = convert_name(name)
return indent, title, relative_path


def clean_summary(summary_contents):
output, fstring_added = [], False
for line in summary_contents:
if is_retained(line):
output.append(line)
else:
if not fstring_added:
output.append("{docugen}")
fstring_added = True

return "".join(output)


def is_retained(line):
if "ref/" not in line:
return True
else:
if any([skip in line for skip in SKIPS]):
return True
else:
return False


def get_args():
parser = argparse.ArgumentParser(
description="Generate documentation for the wandb library and CLI."
Expand All @@ -250,7 +286,7 @@ def get_args():
"--template_file",
type=str,
default="_SUMMARY.md",
help="Template markdown file with {docugen} where filenames to be written. "
help="Template markdown file for table of contents. "
+ "Defaults to ./_SUMMARY.md",
)
parser.add_argument(
Expand All @@ -277,19 +313,17 @@ def get_args():


def check_commit_id(commit_id):
"""
Checks for a valid commit id.
"""Checks for a valid commit id.
Args:
commit_id: The commit id provided
"""

if "." in commit_id:
# commit_id is a version
wandb_version = f"v{wandb.__version__}"
assert (
wandb_version == commit_id
), f"git version does not match wandb version {wandb_version}"
), f"git version {commit_id} does not match wandb version {wandb_version}"
else:
# commit_id is a git hash
commit_id_len = len(commit_id)
Expand Down
Loading

0 comments on commit 59d23cd

Please sign in to comment.