Improve performance and output of entvalidate.py

The most notable changes are:

- improved performance thanks to using `lxml` instead of `xmllint` for
  validating the templates
- improved performance by parallelizing the validation across multiple
  threads
- more meaningful output for detected validation errors
- easier to call from other scripts (like checkrefs.py)
This commit is contained in:
Dunedan 2025-03-18 12:57:01 +01:00
parent f68a8c2532
commit e755ef7155
No known key found for this signature in database
GPG key ID: 885B16854284E0B2
2 changed files with 118 additions and 90 deletions

View file

@ -1,56 +1,116 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import logging
import shutil
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from subprocess import CalledProcessError, run
from xml.etree import ElementTree as ET
from lxml import etree
from lxml.etree import DocumentInvalid, ElementTree
from scriptlib import SimulTemplateEntity, find_files
SIMUL_TEMPLATES_PATH = Path("simulation/templates")
ENTITY_RELAXNG_FNAME = "entity.rng"
RELAXNG_SCHEMA_ERROR_MSG = """Relax NG schema non existant.
Please create the file: {}
You can do that by running 'pyrogenesis -dumpSchema' in the 'system' directory
"""
XMLLINT_ERROR_MSG = (
"xmllint not found in your PATH, please install it (usually in libxml2 package)"
)
class SingleLevelFilter(logging.Filter):
def __init__(self, passlevel, reject):
self.passlevel = passlevel
self.reject = reject
def filter(self, record):
if self.reject:
return record.levelno != self.passlevel
return record.levelno == self.passlevel
def init_logger(log_level) -> logging.Logger:
"""Initialize a logger."""
logger = logging.getLogger(__name__)
logger.setLevel(log_level)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(log_level)
handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
logger.addHandler(handler)
return logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# create a console handler, seems nicer to Windows and for future uses
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter("%(levelname)s - %(message)s"))
f1 = SingleLevelFilter(logging.INFO, False)
ch.addFilter(f1)
logger.addHandler(ch)
errorch = logging.StreamHandler(sys.stderr)
errorch.setLevel(logging.WARNING)
errorch.setFormatter(logging.Formatter("%(levelname)s - %(message)s"))
logger.addHandler(errorch)
def validate_template(
simulation_template_entity: SimulTemplateEntity,
template_path: Path,
relaxng_schema: ElementTree,
mod_name: str,
) -> None:
"""Validate a single template."""
entity = simulation_template_entity.load_inherited(
SIMUL_TEMPLATES_PATH, str(template_path.relative_to(SIMUL_TEMPLATES_PATH)), [mod_name]
)
relaxng_schema.assertValid(etree.fromstring(ET.tostring(entity, encoding="utf-8")))
def main() -> int:
parser = argparse.ArgumentParser(description="Validate templates")
class ValidationError(Exception):
pass
def validate_templates(
logger: logging.Logger,
vfs_root: Path,
mod_name: str,
relaxng_schema_path: Path,
templates: list[Path] | None,
) -> None:
"""Validate templates against the given RELAX NG schema."""
if templates:
templates = [(template, None) for template in templates]
else:
templates = find_files(vfs_root, [mod_name], SIMUL_TEMPLATES_PATH.as_posix(), "xml")
templates_to_validate = []
for fp, _ in templates:
if fp.stem.startswith("template_"):
continue
template_path = fp.as_posix()
if template_path.startswith(
(
f"{SIMUL_TEMPLATES_PATH.as_posix()}/mixins/",
f"{SIMUL_TEMPLATES_PATH.as_posix()}/special/",
)
):
continue
templates_to_validate.append(fp)
simulation_template_entity = SimulTemplateEntity(vfs_root, logger)
relaxng_schema = etree.RelaxNG(file=relaxng_schema_path)
count, failed = 0, 0
with ThreadPoolExecutor() as executor:
futures = {}
for template_path in templates_to_validate:
future = executor.submit(
validate_template,
simulation_template_entity,
template_path,
relaxng_schema,
mod_name,
)
futures[future] = template_path
for future in as_completed(futures):
count += 1
template_path = futures[future]
logger.debug("Processed %s", template_path)
try:
future.result()
except DocumentInvalid as e:
failed += 1
logger.error("%s: %s", template_path, e) # noqa: TRY400
logger.info("Total: %s; failed: %s", count, failed)
if failed:
raise ValidationError
def main() -> None:
parser = argparse.ArgumentParser(description="Validate templates against a RELAX NG schema.")
parser.add_argument("-m", "--mod-name", required=True, help="The name of the mod to validate.")
parser.add_argument(
"-r",
@ -65,72 +125,39 @@ def main() -> int:
"--relaxng-schema",
default=Path() / ENTITY_RELAXNG_FNAME,
type=Path,
help="The path to mod's root location.",
help="The path to the RELAX NG schema.",
)
parser.add_argument(
"-t", "--templates", nargs="*", help="Optionally, a list of templates to validate."
"-t",
"--templates",
nargs="*",
type=Path,
help="A list of templates to validate. If omitted all templates will be validated.",
)
parser.add_argument(
"-v", "--verbose", help="Be verbose about the output.", action="store_true"
)
parser.add_argument("-v", "--verbose", help="Be verbose about the output.", default=False)
args = parser.parse_args()
log_level = logging.DEBUG if args.verbose else logging.INFO
logger = init_logger(log_level)
if not args.relaxng_schema.exists():
logging.error(RELAXNG_SCHEMA_ERROR_MSG.format(args.relaxng_schema))
return 1
if not shutil.which("xmllint"):
logging.error(XMLLINT_ERROR_MSG)
return 2
if args.templates:
templates = sorted([(Path(t), None) for t in args.templates])
else:
templates = sorted(
find_files(args.vfs_root, [args.mod_name], SIMUL_TEMPLATES_PATH.as_posix(), "xml")
logging.error(
"RELAX NG schema file doesn't exist. Please create the file: %s. You can do that by "
'running "pyrogenesis -dumpSchema" in the "binaries/system" directory',
args.relaxng_schema,
)
sys.exit(1)
simul_template_entity = SimulTemplateEntity(args.vfs_root, logger)
count, failed = 0, 0
for fp, _ in templates:
if fp.stem.startswith("template_"):
continue
path = fp.as_posix()
if path.startswith(
(
f"{SIMUL_TEMPLATES_PATH.as_posix()}/mixins/",
f"{SIMUL_TEMPLATES_PATH.as_posix()}/special/",
)
):
continue
if args.verbose:
logger.info("Parsing %s...", fp)
count += 1
entity = simul_template_entity.load_inherited(
SIMUL_TEMPLATES_PATH, str(fp.relative_to(SIMUL_TEMPLATES_PATH)), [args.mod_name]
try:
validate_templates(
logger, args.vfs_root, args.mod_name, args.relaxng_schema, args.templates
)
xmlcontent = ET.tostring(entity, encoding="unicode")
try:
run(
["xmllint", "--relaxng", str(args.relaxng_schema.resolve()), "-"],
input=xmlcontent,
encoding="utf-8",
capture_output=True,
text=True,
check=True,
)
except CalledProcessError as e:
failed += 1
if e.stderr:
logger.exception(e.stderr)
if e.stdout:
logger.info(e.stdout)
logger.info("Total: %s; failed: %s", count, failed)
return 0
except ValidationError:
sys.exit(1)
if __name__ == "__main__":
raise SystemExit(main())
main()

View file

@ -0,0 +1 @@
lxml>=5.0.0