From d4f033a2f5911cb4a28ce2e6ce4fbe39d860718b Mon Sep 17 00:00:00 2001 From: Peter Adrichem Date: Sat, 18 Jan 2025 15:57:29 +0100 Subject: [PATCH] validate and xp: grouped options and documentation. --- README.rst | 50 +++++++++++++++++--------- docs/changelog.rst | 3 +- docs/validate.rst | 38 +++++++++++--------- docs/xp.rst | 80 +++++++++++++++++++++++------------------ src/xul/cmd/validate.py | 33 +++++++++-------- src/xul/cmd/xp.py | 72 +++++++++++++++++++------------------ 6 files changed, 156 insertions(+), 120 deletions(-) diff --git a/README.rst b/README.rst index 23f30d8..345fbc9 100644 --- a/README.rst +++ b/README.rst @@ -30,8 +30,6 @@ Xul -- XML Utilities :target: https://github.com/psf/black Xul is a set of XML scripts written in Python. -Documentation can be found on `Read The Docs`_. - Xul scripts =========== @@ -60,6 +58,10 @@ Dependencies Xul uses the excellent lxml_ XML toolkit, a Pythonic binding for the C libraries libxml2_ and libxslt_. +Documentation +============= +Xul documentation can be found on `Read The Docs`_. + Options ------- List the command-line options of a Xul script with ``--help``. @@ -67,25 +69,39 @@ For example: .. code:: - $ ppx --help + $ xp --help - usage: ppx [-h] [-V] [-n] [-o] [xml_source [xml_source ...]] + usage: xp [-h] [-V] [-l | -L] [-d DEFAULT_NS_PREFIX] [-e] [-q] [-p] [-r] [-m] xpath_expr [xml_source ...] - Pretty Print XML source in human readable form. + Select nodes in an XML source with an XPath expression. positional arguments: - xml_source XML source (file, , http://...) - - optional arguments: - -h, --help show this help message and exit - -V, --version show program's version number and exit - -n, --no-syntax no syntax highlighting - -o, --omit-declaration - omit the XML declaration - -Documentation -============= -Xul documentation can be found on `Read The Docs`_. + xpath_expr XPath expression + xml_source XML source (file, , http://...) + + options: + -h, --help show this help message and exit + -V, --version show program's version number and exit + -m, --method use ElementTree.xpath method instead of XPath class + + file hit options: + output filenames to standard output + + -l, -f, --files-with-hits + only the names of files with a non-false and non-NaN result are written to standard output + -L, -F, --files-without-hits + only the names of files with a false or NaN result, or without any results are written to + standard output + + namespace options: + -d DEFAULT_NS_PREFIX, --default-prefix DEFAULT_NS_PREFIX + set the prefix for the default namespace in XPath [default: 'd'] + -e, --exslt add EXSLT XML namespaces + -q, --quiet don't print XML source namespaces + + output options: + -p, --pretty-element pretty print the result element + -r, --result-xpath print the XPath expression of the result element (or its parent) W3C standards ------------- diff --git a/docs/changelog.rst b/docs/changelog.rst index aa9913d..8d65292 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -3,7 +3,7 @@ Changelog This document records all notable changes to `Xul `_. -`Unreleased `_ (2025-01-17) +`Unreleased `_ (2025-01-18) -------------------------------------------------------------------------------------- * Drop support for Python < 3.9. * :doc:`xp `: fix boolean result (Python >= 3.12). @@ -14,6 +14,7 @@ This document records all notable changes to `Xul ` * :doc:`transform `: removed ``--xsl-output`` option (always for ``--file``). * Fixed encoding issues. * Clearer error messages. +* Improved documentation; grouped CLI options. * Code checks: ruff, black, isort, mypy (GitHub Action). * Test script for local testing with Docker Compose. * Typing. diff --git a/docs/validate.rst b/docs/validate.rst index 10209d5..de697ff 100644 --- a/docs/validate.rst +++ b/docs/validate.rst @@ -67,30 +67,34 @@ Options $ validate --help - usage: validate [-h] [-V] (-x XSD_SOURCE | -d DTD_SOURCE | -r RELAXNG_SOURCE) - [-f | -F] - [xml_source [xml_source ...]] + usage: validate [-h] [-V] (-x XSD_SOURCE | -d DTD_SOURCE | -r RELAXNG_SOURCE) [-l | -L] [xml_source ...] - Validate XML source with XSD, DTD or RELAX NG. + Validate an XML source with XSD, DTD or RELAX NG. positional arguments: xml_source XML source (file, , http://...) - optional arguments: + options: -h, --help show this help message and exit -V, --version show program's version number and exit + + XML validator: + choose an XML validator: XSD, DTD or RELAX NG + -x XSD_SOURCE, --xsd XSD_SOURCE XML Schema Definition (XSD) source -d DTD_SOURCE, --dtd DTD_SOURCE Document Type Definition (DTD) source -r RELAXNG_SOURCE, --relaxng RELAXNG_SOURCE RELAX NG source - -f, -l, --validated-files - only the names of validated XML files are written to - standard output - -F, -L, --invalidated-files - only the names of invalidated XML files are written to - standard output + + file hit options: + output filenames to standard output + + -l, -f, --validated-files + only the names of validated XML files are written to standard output + -L, -F, --invalidated-files + only the names of invalidated XML files are written to standard output XML Validation @@ -155,10 +159,10 @@ Validate the XML Schema XSD with the Print file names ---------------- .. program:: validate -.. option:: -f, -l, --validated-files +.. option:: -l, -f, --validated-files -The ``-f, -l, --validated-files`` command-line option only prints the names -of validated XML files. +The ``--validated-files`` command-line option only prints the names of validated XML files +(similar to ``grep --files-with-matches``). Find XML files that validate: @@ -167,10 +171,10 @@ Find XML files that validate: validate -x schema.xsd *.xml -l .. program:: validate -.. option:: -F, -L, --invalidated-files +.. option:: -L, -F, --invalidated-files -The ``-F, -L, --invalidated-files`` command-line option only prints the names -of XML files that don't validate. +The ``--invalidated-files`` command-line option only prints the names of XML files that don't validate +(similar to ``grep --files-without-match``). Remove XML files that fail to validate: diff --git a/docs/xp.rst b/docs/xp.rst index fe99727..e350e6d 100644 --- a/docs/xp.rst +++ b/docs/xp.rst @@ -40,7 +40,7 @@ Options $ xp --help - usage: xp [-h] [-V] [-e] [-d DEFAULT_NS_PREFIX] [-q] [-p] [-r] [-f | -F] [-m] xpath_expr [xml_source ...] + usage: xp [-h] [-V] [-l | -L] [-d DEFAULT_NS_PREFIX] [-e] [-q] [-p] [-r] [-m] xpath_expr [xml_source ...] Select nodes in an XML source with an XPath expression. @@ -51,18 +51,26 @@ Options options: -h, --help show this help message and exit -V, --version show program's version number and exit - -e, --exslt add EXSLT XML namespaces - -d DEFAULT_NS_PREFIX, --default-prefix DEFAULT_NS_PREFIX - set the prefix for the default namespace in XPath [default: 'd'] - -q, --quiet don't print XML source namespaces - -p, --pretty-element pretty print the result element - -r, --result-xpath print the XPath expression of the result element (or its parent) - -f, -l, --files-with-hits + -m, --method use ElementTree.xpath method instead of XPath class + + file hit options: + output filenames to standard output + + -l, -f, --files-with-hits only the names of files with a non-false and non-NaN result are written to standard output - -F, -L, --files-without-hits + -L, -F, --files-without-hits only the names of files with a false or NaN result, or without any results are written to standard output - -m, --method use ElementTree.xpath method instead of XPath class + + namespace options: + -d DEFAULT_NS_PREFIX, --default-prefix DEFAULT_NS_PREFIX + set the prefix for the default namespace in XPath [default: 'd'] + -e, --exslt add EXSLT XML namespaces + -q, --quiet don't print XML source namespaces + + output options: + -p, --pretty-element pretty print the result element + -r, --result-xpath print the XPath expression of the result element (or its parent) .. index:: @@ -90,6 +98,27 @@ List the XPath expressions of all elements with attributes: xp -r "//@*" file.xml +.. index:: + single: xp script; pretty print + +Pretty print result element +--------------------------- +.. program:: xp +.. option:: -p, --pretty-element + +A result element node can be pretty printed with the ``--pretty-element`` command-line option. + +.. warning:: The ``--pretty-element`` option removes all white space text nodes + *before* applying the XPath expression. Therefore there will be no white space + text nodes in the results. + +Pretty print the latest Python PEP: + +.. code-block:: bash + + curl -s https://peps.python.org/peps.rss | xp "//item[1]" -p + + .. index:: single: xp script; namespaces single: XML Namespaces @@ -174,43 +203,23 @@ Find Python PEPs with four digits in the title (case-insensitive): xp -e '//item/title[re:match(text(), "pep [0-9]{4}:", "i")]' -q -.. index:: - single: xp script; pretty print - -Pretty print element result ---------------------------- -.. program:: xp -.. option:: -p, --pretty-element - -A result element node can be pretty printed with the ``--pretty-element`` command-line option. - -.. warning:: The ``--pretty-element`` option removes all white space text nodes - *before* applying the XPath expression. Therefore there will be no white space - text nodes in the results. - -Pretty print the latest Python PEP: - -.. code-block:: bash - - curl -s https://peps.python.org/peps.rss | xp "//item[1]" -p - - .. index:: single: xp script; file names Print file names ---------------- .. program:: xp -.. option:: -f, -l, --files-with-hits +.. option:: -l, -f, --files-with-hits The ``--files-with-hits`` command-line option only prints the names of files with an XPath result that is not false and not NaN (not a number). +This is similar to ``grep --files-with-matches`` using XPath instead of regular expressions. Find XML files with HTTP URL's: .. code-block:: bash - xp "//mpeg7:MediaUri[starts-with(., 'http://')]" *.xml -f + xp "//mpeg7:MediaUri[starts-with(., 'http://')]" *.xml -l XML files where all the book prices are below € 25,-. @@ -219,16 +228,17 @@ XML files where all the book prices are below € 25,-. xp -el "math:max(//book/price[@currency='€'])<25" *.xml .. program:: xp -.. option:: -F, -L, --files-without-hits +.. option:: -L, -F, --files-without-hits The ``--files-without-hits`` command-line option only prints the names of files without any XPath results, or with a false or NaN result. +This is similar to ``grep --files-without-match`` using XPath instead of regular expressions. XML files without a person with the family name 'Bauwens': .. code-block:: bash - xp "//mpeg7:FamilyName[text()='Bauwens']" *.xml -F + xp "//mpeg7:FamilyName[text()='Bauwens']" *.xml -L xpath method ------------ diff --git a/src/xul/cmd/validate.py b/src/xul/cmd/validate.py index 32909bd..db3964d 100644 --- a/src/xul/cmd/validate.py +++ b/src/xul/cmd/validate.py @@ -1,4 +1,4 @@ -"""Validate XML source with XSD, DTD or RELAX NG.""" +"""Validate an XML source with XSD, DTD or RELAX NG.""" import argparse import sys @@ -7,6 +7,7 @@ from lxml import etree from .. import __version__ +from ..etree import get_source_name from ..log import setup_logger_console from ..validate import build_dtd, build_relaxng, build_xml_schema, validate_xml @@ -15,33 +16,39 @@ def parse_cl() -> argparse.Namespace: """Parse the command line for options and XML sources.""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-V", "--version", action="version", version="%(prog)s " + __version__) - lang_group = parser.add_mutually_exclusive_group(required=True) - lang_group.add_argument( + validator_args_group = parser.add_argument_group( + title="XML validator", description="choose an XML validator: XSD, DTD or RELAX NG" + ) + validator_group = validator_args_group.add_mutually_exclusive_group(required=True) + validator_group.add_argument( "-x", "--xsd", action="store", dest="xsd_source", help="XML Schema Definition (XSD) source" ) - lang_group.add_argument( + validator_group.add_argument( "-d", "--dtd", action="store", dest="dtd_source", help="Document Type Definition (DTD) source", ) - lang_group.add_argument( + validator_group.add_argument( "-r", "--relaxng", action="store", dest="relaxng_source", help="RELAX NG source" ) - file_group = parser.add_mutually_exclusive_group(required=False) - file_group.add_argument( - "-f", + file_group = parser.add_argument_group( + title="file hit options", description="output filenames to standard output" + ) + file_hit_group = file_group.add_mutually_exclusive_group(required=False) + file_hit_group.add_argument( "-l", + "-f", "--validated-files", action="store_true", default=False, dest="validated_files", help="only the names of validated XML files are written to standard output", ) - file_group.add_argument( - "-F", + file_hit_group.add_argument( "-L", + "-F", "--invalidated-files", action="store_true", default=False, @@ -71,11 +78,7 @@ def apply_validator( if args.validated_files or args.invalidated_files: valid = validate_xml(xml_source, validator, silent=True) if (valid and args.validated_files) or (not valid and args.invalidated_files): - if xml_source in ("-", sys.stdin): - # . - print(sys.stdin.name) - else: - print(xml_source) + print(get_source_name(xml_source)) else: validate_xml(xml_source, validator) diff --git a/src/xul/cmd/xp.py b/src/xul/cmd/xp.py index 4447ae8..4ff28ce 100644 --- a/src/xul/cmd/xp.py +++ b/src/xul/cmd/xp.py @@ -7,7 +7,7 @@ from lxml import etree from .. import __version__ -from ..etree import build_etree +from ..etree import build_etree, get_source_name from ..log import setup_logger_console from ..ppxml import prettyprint from ..xpath import build_xpath, etree_xpath, namespaces @@ -24,15 +24,32 @@ def parse_cl() -> argparse.Namespace: metavar="xml_source", help="XML source (file, , http://...)", ) - parser.add_argument( - "-e", - "--exslt", + file_group = parser.add_argument_group( + title="file hit options", description="output filenames to standard output" + ) + file_hit_group = file_group.add_mutually_exclusive_group(required=False) + file_hit_group.add_argument( + "-l", + "-f", + "--files-with-hits", action="store_true", default=False, - dest="exslt", - help="add EXSLT XML namespaces", + dest="files_with_hits", + help="only the names of files with a non-false and non-NaN result " + + "are written to standard output", ) - parser.add_argument( + file_hit_group.add_argument( + "-L", + "-F", + "--files-without-hits", + action="store_true", + default=False, + dest="files_without_hits", + help="only the names of files with a false or NaN result, " + + "or without any results are written to standard output", + ) + namespace_group = parser.add_argument_group(title="namespace options") + namespace_group.add_argument( "-d", "--default-prefix", action="store", @@ -40,7 +57,15 @@ def parse_cl() -> argparse.Namespace: dest="default_ns_prefix", help="set the prefix for the default namespace in XPath [default: '%(default)s']", ) - parser.add_argument( + namespace_group.add_argument( + "-e", + "--exslt", + action="store_true", + default=False, + dest="exslt", + help="add EXSLT XML namespaces", + ) + namespace_group.add_argument( "-q", "--quiet", action="store_false", @@ -48,7 +73,8 @@ def parse_cl() -> argparse.Namespace: dest="verbose", help="don't print XML source namespaces", ) - parser.add_argument( + output_group = parser.add_argument_group(title="output options") + output_group.add_argument( "-p", "--pretty-element", action="store_true", @@ -56,7 +82,7 @@ def parse_cl() -> argparse.Namespace: dest="pretty_element", help="pretty print the result element", ) - parser.add_argument( + output_group.add_argument( "-r", "--result-xpath", action="store_true", @@ -64,27 +90,6 @@ def parse_cl() -> argparse.Namespace: dest="result_xpath", help="print the XPath expression of the result element (or its parent)", ) - file_group = parser.add_mutually_exclusive_group(required=False) - file_group.add_argument( - "-f", - "-l", - "--files-with-hits", - action="store_true", - default=False, - dest="files_with_hits", - help="only the names of files with a non-false and non-NaN result " - + "are written to standard output", - ) - file_group.add_argument( - "-F", - "-L", - "--files-without-hits", - action="store_true", - default=False, - dest="files_without_hits", - help="only the names of files with a false or NaN result, " - + "or without any results are written to standard output", - ) parser.add_argument( "-m", "--method", @@ -426,10 +431,7 @@ def xpath_on_xml( return False # Printable name for sys.stdin. - if xml_source in ("-", sys.stdin): - source_name = sys.stdin.name - else: - source_name = xml_source + source_name = get_source_name(xml_source) # XML sources names (--files-with-results/--files-without-results). if args.files_with_hits or args.files_without_hits: