-
Notifications
You must be signed in to change notification settings - Fork 84
Feature: categorization of notebooks to simplify huge repositories navigation #190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
0c5b9ee
c8e81f4
fc3d2a7
2f114bc
733573f
97a90f5
d74373e
c31e8a5
fd8523f
f663178
33e7d0e
db67d7d
bda0994
eee88d0
fd947d8
6e73cdc
9fad0e9
ad644b5
f1ddbe3
3263553
ee43675
2cf120e
1563576
0b0b713
3eaefcd
05e210d
ced0ced
df92fab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we please add some documentation/screenshots of the functionality? |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,12 @@ def filesystem_default_value(dirname): | |
is_flag=True, | ||
help="If selected, notebooker set current working directory to absolute path of the notebook to keep it local context available", | ||
) | ||
@click.option( | ||
"--categorization", | ||
default=False, | ||
is_flag=True, | ||
help="If selected, discovers only templates with the 'category=example' tags set to any cell and groups notebooks by their category names", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the expected behaviour when multiple cells in the same notebook have clashing categories? |
||
) | ||
@click.option( | ||
"--default-mailfrom", default=DEFAULT_MAILFROM_ADDRESS, help="Set a new value for the default mailfrom setting." | ||
) | ||
|
@@ -91,6 +97,7 @@ def base_notebooker( | |
py_template_subdir, | ||
notebooker_disable_git, | ||
execute_at_origin, | ||
categorization, | ||
default_mailfrom, | ||
running_timeout, | ||
serializer_cls, | ||
|
@@ -106,6 +113,7 @@ def base_notebooker( | |
PY_TEMPLATE_SUBDIR=py_template_subdir, | ||
NOTEBOOKER_DISABLE_GIT=notebooker_disable_git, | ||
EXECUTE_AT_ORIGIN=execute_at_origin, | ||
CATEGORIZATION=categorization, | ||
DEFAULT_MAILFROM=default_mailfrom, | ||
RUNNING_TIMEOUT=running_timeout, | ||
) | ||
|
@@ -180,6 +188,7 @@ def start_webapp( | |
|
||
@base_notebooker.command() | ||
@click.option("--report-name", help="The name of the template to execute, relative to the template directory.") | ||
@click.option("--category", default="", help="Category of the template.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this come from the cell tags? How come this needs to be passed in? |
||
@click.option( | ||
"--overrides-as-json", default="{}", help="The parameters to inject into the notebook template, in JSON format." | ||
) | ||
|
@@ -230,6 +239,7 @@ def start_webapp( | |
def execute_notebook( | ||
config: BaseConfig, | ||
report_name, | ||
category, | ||
overrides_as_json, | ||
iterate_override_values_of, | ||
report_title, | ||
|
@@ -250,6 +260,7 @@ def execute_notebook( | |
return execute_notebook_entrypoint( | ||
config, | ||
report_name, | ||
category, | ||
overrides_as_json, | ||
iterate_override_values_of, | ||
report_title, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,7 @@ def _run_checks( | |
scheduler_job_id: Optional[str] = None, | ||
mailfrom: Optional[str] = None, | ||
is_slideshow: bool = False, | ||
category: Optional[str] = None, | ||
) -> NotebookResultComplete: | ||
""" | ||
This is the actual method which executes a notebook, whether running in the webapp or via the entrypoint. | ||
|
@@ -152,6 +153,7 @@ def _run_checks( | |
generate_pdf_output=generate_pdf_output, | ||
report_name=template_name, | ||
report_title=report_title, | ||
category=category, | ||
overrides=overrides, | ||
scheduler_job_id=scheduler_job_id, | ||
mailfrom=mailfrom, | ||
|
@@ -164,6 +166,7 @@ def _run_checks( | |
def run_report( | ||
job_submit_time, | ||
report_name, | ||
category, | ||
overrides, | ||
result_serializer, | ||
report_title="", | ||
|
@@ -222,6 +225,7 @@ def run_report( | |
scheduler_job_id=scheduler_job_id, | ||
mailfrom=mailfrom, | ||
is_slideshow=is_slideshow, | ||
category=category, | ||
) | ||
logger.info("Successfully got result.") | ||
result_serializer.save_check_result(result) | ||
|
@@ -234,6 +238,7 @@ def run_report( | |
job_start_time=job_submit_time, | ||
report_name=report_name, | ||
report_title=report_title, | ||
category=category, | ||
error_info=error_info, | ||
overrides=overrides, | ||
mailto=mailto, | ||
|
@@ -257,6 +262,7 @@ def run_report( | |
return run_report( | ||
job_submit_time, | ||
report_name, | ||
category, | ||
overrides, | ||
result_serializer, | ||
report_title=report_title, | ||
|
@@ -351,6 +357,7 @@ def _get_overrides(overrides_as_json: AnyStr, iterate_override_values_of: Option | |
def execute_notebook_entrypoint( | ||
config: BaseConfig, | ||
report_name: str, | ||
category: str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an |
||
overrides_as_json: str, | ||
iterate_override_values_of: Union[List[str], str], | ||
report_title: str, | ||
|
@@ -377,6 +384,7 @@ def execute_notebook_entrypoint( | |
start_time = datetime.datetime.now() | ||
logger.info("Running a report with these parameters:") | ||
logger.info("report_name = %s", report_name) | ||
logger.info("category = %s", category) | ||
logger.info("overrides_as_json = %s", overrides_as_json) | ||
logger.info("iterate_override_values_of = %s", iterate_override_values_of) | ||
logger.info("report_title = %s", report_title) | ||
|
@@ -407,6 +415,7 @@ def execute_notebook_entrypoint( | |
result = run_report( | ||
start_time, | ||
report_name, | ||
category, | ||
overrides, | ||
result_serializer, | ||
report_title=report_title, | ||
|
@@ -495,6 +504,7 @@ def run_report_in_subprocess( | |
email_subject=None, | ||
n_retries=3, | ||
is_slideshow=False, | ||
category=None, | ||
) -> str: | ||
""" | ||
Execute the Notebooker report in a subprocess. | ||
|
@@ -513,6 +523,7 @@ def run_report_in_subprocess( | |
:param email_subject: `str` if passed, then this string will be used in the email subject | ||
:param n_retries: The number of retries to attempt. | ||
:param is_slideshow: Whether the notebook is a reveal.js slideshow or not. | ||
:param category: Category of the notebook | ||
:return: The unique job_id. | ||
""" | ||
if error_mailto is None: | ||
|
@@ -535,6 +546,7 @@ def run_report_in_subprocess( | |
is_slideshow=is_slideshow, | ||
email_subject=email_subject, | ||
mailfrom=mailfrom, | ||
category=category, | ||
) | ||
|
||
command = ( | ||
|
@@ -578,6 +590,7 @@ def run_report_in_subprocess( | |
+ (["--is-slideshow"] if is_slideshow else []) | ||
+ ([f"--scheduler-job-id={scheduler_job_id}"] if scheduler_job_id is not None else []) | ||
+ ([f"--mailfrom={mailfrom}"] if mailfrom is not None else []) | ||
+ ([f"--category={category}"] if category is not None else []) | ||
+ ([f"--email-subject={email_subject}"] if email_subject else []) | ||
) | ||
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -209,6 +209,7 @@ def save_check_stub( | |||||
is_slideshow: bool = False, | ||||||
email_subject: Optional[str] = None, | ||||||
mailfrom: Optional[str] = None, | ||||||
category: Optional[str] = None, | ||||||
) -> None: | ||||||
"""Call this when we are just starting a check. Saves a "pending" job into storage.""" | ||||||
job_start_time = job_start_time or datetime.datetime.now() | ||||||
|
@@ -228,6 +229,7 @@ def save_check_stub( | |||||
scheduler_job_id=scheduler_job_id, | ||||||
is_slideshow=is_slideshow, | ||||||
mailfrom=mailfrom, | ||||||
category=category, | ||||||
) | ||||||
self._save_to_db(pending_result) | ||||||
|
||||||
|
@@ -325,6 +327,7 @@ def _convert_result( | |||||
scheduler_job_id=result.get("scheduler_job_id", None), | ||||||
is_slideshow=result.get("is_slideshow", False), | ||||||
email_subject=result.get("email_subject", None), | ||||||
category=result.get("category", None), | ||||||
) | ||||||
elif cls == NotebookResultPending: | ||||||
return NotebookResultPending( | ||||||
|
@@ -344,6 +347,7 @@ def _convert_result( | |||||
stdout=result.get("stdout", []), | ||||||
scheduler_job_id=result.get("scheduler_job_id", None), | ||||||
is_slideshow=result.get("is_slideshow", False), | ||||||
category=result.get("category", None), | ||||||
) | ||||||
|
||||||
elif cls == NotebookResultError: | ||||||
|
@@ -370,6 +374,7 @@ def _convert_result( | |||||
stdout=result.get("stdout", []), | ||||||
scheduler_job_id=result.get("scheduler_job_id", False), | ||||||
is_slideshow=result.get("is_slideshow", False), | ||||||
category=result.get("category", None), | ||||||
) | ||||||
else: | ||||||
raise ValueError("Could not deserialise {} into result object.".format(result)) | ||||||
|
@@ -397,10 +402,17 @@ def _get_result_count(self, base_filter): | |||||
|
||||||
def get_count_and_latest_time_per_report(self, subfolder: Optional[str]): | ||||||
base_filer = {} if not subfolder else {"report_name": {"$regex": subfolder + ".*"}} | ||||||
return self.fetch_reports(base_filer) | ||||||
|
||||||
def get_count_and_latest_time_per_report_per_category(self, category: Optional[str]): | ||||||
base_filer = {} if not category else {"category": category} | ||||||
return self.fetch_reports(base_filer) | ||||||
|
||||||
def fetch_reports(self, base_filer: Dict[str, Any]): | ||||||
reports = list( | ||||||
self._get_raw_results( | ||||||
base_filter=base_filer, | ||||||
projection={"report_name": 1, "job_start_time": 1, "scheduler_job_id": 1, "_id": 0}, | ||||||
projection={"report_name": 1, "job_start_time": 1, "scheduler_job_id": 1, "category": 1, "_id": 0}, | ||||||
limit=0, | ||||||
) | ||||||
) | ||||||
|
@@ -411,7 +423,12 @@ def get_count_and_latest_time_per_report(self, subfolder: Optional[str]): | |||||
for report, all_runs in jobs_by_name.items(): | ||||||
latest_start_time = max(r["job_start_time"] for r in all_runs) | ||||||
scheduled_runs = len([x for x in all_runs if x.get("scheduler_job_id")]) | ||||||
output[report] = {"count": len(all_runs), "latest_run": latest_start_time, "scheduler_runs": scheduled_runs} | ||||||
output[report] = { | ||||||
"count": len(all_runs), | ||||||
"latest_run": latest_start_time, | ||||||
"scheduler_runs": scheduled_runs, | ||||||
"category": r["category"], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This key will not exist if it is not in the mongo document.
Suggested change
|
||||||
} | ||||||
return output | ||||||
|
||||||
def get_all_results( | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -28,6 +28,9 @@ class BaseConfig: | |||||
# A boolean flag to dictate whether we should execute the notebook at the origin or not. | ||||||
EXECUTE_AT_ORIGIN: bool = False | ||||||
|
||||||
# A boolean flag to dictate whether we should discover and group notebooker by their category tags. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
CATEGORIZATION: bool = False | ||||||
|
||||||
# The serializer class we are using for storage, e.g. PyMongoResultSerializer | ||||||
SERIALIZER_CLS: DEFAULT_SERIALIZER = None | ||||||
# The dictionary of parameters which are used to initialize the serializer class above | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we please revert the version bump until we have ensured the tests pass on master