Skip to content

Commit 72029cf

Browse files
committed
hrw4u: Allows bulk compilation of many files (faster)
1 parent 9b7734e commit 72029cf

5 files changed

Lines changed: 239 additions & 40 deletions

File tree

doc/admin-guide/configuration/hrw4u.en.rst

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,56 @@ follows to produce the help output:
8080
8181
hrw4u --help
8282
83-
Doing a compile is simply:
83+
Basic Usage
84+
^^^^^^^^^^^
85+
86+
Compile a single file to stdout:
8487

8588
.. code-block:: none
8689
8790
hrw4u some_file.hrw4u
8891
89-
in Addition to ``hrw4u``, you also have the reverse tool, converting existing ``header_rewrite``
90-
configurations to ``hrw4u``. This tool is named ``u4wrh``. For people using IDEs, the package also
91-
provides an LSP for this language, named ``hrw4u-lsp``.
92+
Compile from stdin:
93+
94+
.. code-block:: none
95+
96+
cat some_file.hrw4u | hrw4u
97+
98+
Compile multiple files to stdout (separated by ``# ---``):
99+
100+
.. code-block:: none
101+
102+
hrw4u file1.hrw4u file2.hrw4u file3.hrw4u
103+
104+
Bulk Compilation
105+
^^^^^^^^^^^^^^^^
106+
107+
For bulk compilation, use the ``input:output`` format to compile multiple files
108+
to their respective output files in a single command:
109+
110+
.. code-block:: none
111+
112+
hrw4u file1.hrw4u:file1.conf file2.hrw4u:file2.conf file3.hrw4u:file3.conf
113+
114+
This is particularly useful for build systems or when processing many configuration
115+
files at once. All files are processed in a single invocation, improving performance
116+
for large batches of files.
117+
118+
Reverse Tool (u4wrh)
119+
^^^^^^^^^^^^^^^^^^^^
120+
121+
In addition to ``hrw4u``, you also have the reverse tool, converting existing ``header_rewrite``
122+
configurations to ``hrw4u``. This tool is named ``u4wrh`` and supports the same usage patterns:
123+
124+
.. code-block:: none
125+
126+
# Convert single file to stdout
127+
u4wrh existing_config.conf
128+
129+
# Bulk conversion
130+
u4wrh file1.conf:file1.hrw4u file2.conf:file2.hrw4u
131+
132+
For people using IDEs, the package also provides an LSP for this language, named ``hrw4u-lsp``.
92133

93134
Syntax Differences
94135
==================

tools/hrw4u/scripts/hrw4u

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,19 @@ from __future__ import annotations
2222
from hrw4u.hrw4uLexer import hrw4uLexer
2323
from hrw4u.hrw4uParser import hrw4uParser
2424
from hrw4u.visitor import HRW4UVisitor
25-
from hrw4u.common import create_base_parser, create_parse_tree, generate_output, process_input
25+
from hrw4u.common import run_main
2626

2727

2828
def main() -> None:
2929
"""Main entry point for the hrw4u script."""
30-
parser, output_group = create_base_parser("Process HRW4U input and produce output (AST or HRW).")
31-
32-
# Argument parsing
33-
output_group.add_argument("--hrw", action="store_true", help="Produce the HRW output (default)")
34-
parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)")
35-
args = parser.parse_args()
36-
37-
# Default to HRW output if neither AST nor HRW specified
38-
if not (args.ast or args.hrw):
39-
args.hrw = True
40-
41-
content, filename = process_input(args.input_file)
42-
tree, parser_obj, error_collector = create_parse_tree(
43-
content, filename, hrw4uLexer, hrw4uParser, "hrw4u", not args.stop_on_error)
44-
45-
# Generate output
46-
generate_output(tree, parser_obj, HRW4UVisitor, filename, args, error_collector)
30+
run_main(
31+
description="Process HRW4U input and produce output (AST or HRW).",
32+
lexer_class=hrw4uLexer,
33+
parser_class=hrw4uParser,
34+
visitor_class=HRW4UVisitor,
35+
error_prefix="hrw4u",
36+
output_flag_name="hrw",
37+
output_flag_help="Produce the HRW output (default)")
4738

4839

4940
if __name__ == "__main__":

tools/hrw4u/scripts/u4wrh

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,22 @@
1919

2020
from __future__ import annotations
2121

22-
from hrw4u.common import create_base_parser, create_parse_tree, generate_output, process_input
22+
from hrw4u.common import run_main
2323
from u4wrh.hrw_visitor import HRWInverseVisitor
2424
from u4wrh.u4wrhLexer import u4wrhLexer
2525
from u4wrh.u4wrhParser import u4wrhParser
2626

2727

2828
def main() -> None:
2929
"""Main entry point for the u4wrh script."""
30-
parser, output_group = create_base_parser("Process header_rewrite (HRW) lines and reconstruct hrw4u source.")
31-
32-
# Argument parsing
33-
output_group.add_argument("--hrw4u", action="store_true", help="Produce reconstructed hrw4u output (default)")
34-
parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)")
35-
args = parser.parse_args()
36-
37-
# Default to hrw4u output if neither AST nor hrw4u specified
38-
if not (args.ast or args.hrw4u):
39-
args.hrw4u = True
40-
41-
content, filename = process_input(args.input_file)
42-
tree, parser_obj, error_collector = create_parse_tree(
43-
content, filename, u4wrhLexer, u4wrhParser, "u4wrh", not args.stop_on_error)
44-
45-
# Generate output
46-
generate_output(tree, parser_obj, HRWInverseVisitor, filename, args, error_collector)
30+
run_main(
31+
description="Process header_rewrite (HRW) lines and reconstruct hrw4u source.",
32+
lexer_class=u4wrhLexer,
33+
parser_class=u4wrhParser,
34+
visitor_class=HRWInverseVisitor,
35+
error_prefix="u4wrh",
36+
output_flag_name="hrw4u",
37+
output_flag_help="Produce reconstructed hrw4u output (default)")
4738

4839

4940
if __name__ == "__main__":

tools/hrw4u/src/common.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,110 @@ def generate_output(
229229
print(error_collector.get_error_summary(), file=sys.stderr)
230230
if not args.ast and tree is None:
231231
sys.exit(1)
232+
233+
234+
def run_main(
235+
description: str, lexer_class: type[LexerProtocol], parser_class: type[ParserProtocol],
236+
visitor_class: type[VisitorProtocol], error_prefix: str, output_flag_name: str, output_flag_help: str) -> None:
237+
"""
238+
Generic main function for hrw4u and u4wrh scripts with bulk compilation support.
239+
240+
Args:
241+
description: Description for argument parser
242+
lexer_class: ANTLR lexer class to use
243+
parser_class: ANTLR parser class to use
244+
visitor_class: Visitor class to use
245+
error_prefix: Error prefix for error messages
246+
output_flag_name: Name of output flag (e.g., "hrw", "hrw4u")
247+
output_flag_help: Help text for output flag
248+
"""
249+
import argparse
250+
251+
parser = argparse.ArgumentParser(
252+
description=description,
253+
formatter_class=argparse.RawDescriptionHelpFormatter,
254+
epilog="For bulk compilation to files, use: input1.txt:output1.txt input2.txt:output2.txt ...")
255+
256+
parser.add_argument(
257+
"files", help="Input file(s) to parse. Use input:output for bulk file output (default: stdin to stdout)", nargs="*")
258+
259+
output_group = parser.add_mutually_exclusive_group()
260+
output_group.add_argument("--ast", action="store_true", help="Produce the ANTLR parse tree only")
261+
output_group.add_argument(f"--{output_flag_name}", action="store_true", help=output_flag_help)
262+
263+
parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)")
264+
parser.add_argument("--debug", action="store_true", help="Enable debug output")
265+
parser.add_argument(
266+
"--stop-on-error", action="store_true", help="Stop processing on first error (default: collect and report multiple errors)")
267+
268+
args = parser.parse_args()
269+
270+
if not hasattr(args, output_flag_name):
271+
setattr(args, output_flag_name, False)
272+
273+
if not (args.ast or getattr(args, output_flag_name)):
274+
setattr(args, output_flag_name, True)
275+
276+
if not args.files:
277+
content, filename = process_input(sys.stdin)
278+
tree, parser_obj, error_collector = create_parse_tree(
279+
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error)
280+
generate_output(tree, parser_obj, visitor_class, filename, args, error_collector)
281+
return
282+
283+
has_colons = any(':' in f for f in args.files)
284+
285+
if has_colons:
286+
for pair in args.files:
287+
if ':' not in pair:
288+
print(
289+
f"Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.",
290+
file=sys.stderr)
291+
sys.exit(1)
292+
293+
input_path, output_path = pair.split(':', 1)
294+
295+
try:
296+
with open(input_path, 'r', encoding='utf-8') as input_file:
297+
content = input_file.read()
298+
filename = input_path
299+
except FileNotFoundError:
300+
print(f"Error: Input file '{input_path}' not found", file=sys.stderr)
301+
sys.exit(1)
302+
except Exception as e:
303+
print(f"Error reading '{input_path}': {e}", file=sys.stderr)
304+
sys.exit(1)
305+
306+
tree, parser_obj, error_collector = create_parse_tree(
307+
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error)
308+
309+
try:
310+
original_stdout = sys.stdout
311+
with open(output_path, 'w', encoding='utf-8') as output_file:
312+
sys.stdout = output_file
313+
generate_output(tree, parser_obj, visitor_class, filename, args, error_collector)
314+
sys.stdout = original_stdout
315+
except Exception as e:
316+
sys.stdout = original_stdout
317+
print(f"Error writing to '{output_path}': {e}", file=sys.stderr)
318+
sys.exit(1)
319+
else:
320+
for i, input_path in enumerate(args.files):
321+
if i > 0:
322+
print("\n# ---")
323+
324+
try:
325+
with open(input_path, 'r', encoding='utf-8') as input_file:
326+
content = input_file.read()
327+
filename = input_path
328+
except FileNotFoundError:
329+
print(f"Error: Input file '{input_path}' not found", file=sys.stderr)
330+
sys.exit(1)
331+
except Exception as e:
332+
print(f"Error reading '{input_path}': {e}", file=sys.stderr)
333+
sys.exit(1)
334+
335+
tree, parser_obj, error_collector = create_parse_tree(
336+
content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error)
337+
338+
generate_output(tree, parser_obj, visitor_class, filename, args, error_collector)

tools/hrw4u/tests/utils.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from __future__ import annotations
1818

1919
import re
20+
import subprocess
21+
import tempfile
2022
from pathlib import Path
2123
from typing import Final, Iterator
2224

@@ -52,6 +54,7 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l
5254
"run_ast_test",
5355
"run_failing_test",
5456
"run_reverse_test",
57+
"run_bulk_test",
5558
]
5659

5760

@@ -315,3 +318,69 @@ def test_reverse_conversion(input_file: Path, output_file: Path) -> None:
315318
run_reverse_test(input_file, output_file)
316319

317320
return test_reverse_conversion
321+
322+
323+
def run_bulk_test(group: str) -> None:
324+
"""
325+
Run bulk compilation test for a specific test group.
326+
327+
Collects all .input.txt files in the group, runs hrw4u with bulk
328+
input:output pairs, and compares each output with expected .output.txt.
329+
"""
330+
base_dir = Path("tests/data") / group
331+
exceptions = _read_exceptions(base_dir)
332+
333+
input_files = []
334+
expected_outputs = []
335+
file_pairs = []
336+
337+
with tempfile.TemporaryDirectory() as tmpdir:
338+
tmp_path = Path(tmpdir)
339+
340+
for input_file in sorted(base_dir.glob("*.input.txt")):
341+
if ".fail." in input_file.name:
342+
continue
343+
344+
base = input_file.with_suffix('')
345+
expected_output_file = base.with_suffix('.output.txt')
346+
test_id = base.name
347+
348+
if test_id in exceptions:
349+
test_direction = exceptions[test_id]
350+
if test_direction != "hrw4u":
351+
continue
352+
353+
if not expected_output_file.exists():
354+
continue
355+
356+
input_files.append(input_file)
357+
expected_outputs.append(expected_output_file)
358+
359+
actual_output_file = tmp_path / f"{input_file.stem}.output.txt"
360+
file_pairs.append(f"{input_file.resolve()}:{actual_output_file.resolve()}")
361+
362+
if not file_pairs:
363+
pytest.skip(f"No valid test files found for bulk test in {group}")
364+
return
365+
366+
hrw4u_script = Path("scripts/hrw4u").resolve()
367+
cmd = ["python3", str(hrw4u_script)] + file_pairs
368+
369+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd())
370+
371+
if result.returncode != 0:
372+
pytest.fail(f"hrw4u bulk compilation failed:\nstdout: {result.stdout}\nstderr: {result.stderr}")
373+
374+
for input_file, expected_output_file in zip(input_files, expected_outputs):
375+
actual_output_file = tmp_path / f"{input_file.stem}.output.txt"
376+
377+
if not actual_output_file.exists():
378+
pytest.fail(f"Output file not created for {input_file.name}: {actual_output_file}")
379+
380+
actual_output = actual_output_file.read_text().strip()
381+
expected_output = expected_output_file.read_text().strip()
382+
383+
assert actual_output == expected_output, (
384+
f"Bulk output mismatch for {input_file.name}\n"
385+
f"Expected:\n{expected_output}\n\n"
386+
f"Actual:\n{actual_output}")

0 commit comments

Comments
 (0)