diff options
Diffstat (limited to 'util/testrunner/qt-testrunner.py')
| -rwxr-xr-x | util/testrunner/qt-testrunner.py | 155 |
1 files changed, 118 insertions, 37 deletions
diff --git a/util/testrunner/qt-testrunner.py b/util/testrunner/qt-testrunner.py index 41e81e83122..1573534cee9 100755 --- a/util/testrunner/qt-testrunner.py +++ b/util/testrunner/qt-testrunner.py @@ -4,10 +4,9 @@ # !!!IMPORTANT!!! If you change anything to this script, run the testsuite -# manually and make sure it still passes, as it doesn't run automatically. -# Just execute the command line as such: +# and make sure it still passes: # -# ./util/testrunner/tests/tst_testrunner.py -v [--debug] +# qtbase/tests/auto/util/testrunner/tst_qt_testrunner.py -v [--debug] # # ======== qt-testrunner ======== # @@ -15,24 +14,44 @@ # tst_whatever, and tries to iron out unpredictable test failures. # In particular: # -# + Appends output argument to it: "-o tst_whatever.xml,xml" -# + Checks the exit code. If it is zero, the script exits with zero, -# otherwise proceeds. -# + Reads the XML test log and Understands exactly which function -# of the test failed. -# + If no XML file is found or was invalid, the test executable -# probably CRASHed, so we *re-run the full test once again*. -# + If some testcases failed it executes only those individually -# until they pass, or until max-repeats times is reached. +# + Append output argument to it: "-o tst_whatever.xml,xml" and +# execute it. +# + Save the exit code. +# - If it is <0 or >=128 (see NOTE_2), mark the test run as CRASH. +# + Read the XML test log and find exactly which functions +# of the test FAILed. +# + Mark the test run as CRASH, if: +# - no XML file is found, +# - or an invalid XML file is found, +# - or the XML contains a QFatal message: <Message type="qfatal"> +# - or no test FAILures are listed in the XML but the saved +# exit code is not 0. +# + If, based on the rules above, the test run is marked as CRASH, +# then *re-run the full test once again* and start this logic over. +# If we are on the 2nd run and CRASH happens again, then exit(3). +# + Examine the saved exit code: +# if it is 0, then exit(0) (success, all tests have PASSed). +# + Otherwise, some testcases failed, so execute only those individually +# until they pass, or until max-repeats (default: 5) times is reached. # # The regular way to use is to set the environment variable TESTRUNNER to -# point to this script before invoking ctest. +# point to this script before invoking ctest. In COIN CI it is set as +# TESTRUNNER="qt-testrunner.py --" to stop it from parsing further args. # # NOTE: this script is crafted specifically for use with Qt tests and for # using it in Qt's CI. For example it detects and acts specially if test # executable is "tst_selftests" or "androidtestrunner". It also detects # env var "COIN_CTEST_RESULTSDIR" and uses it as log-dir. # +# NOTE_2: Why is qt-testrunner considering exit code outside [0,127] as CRASH? +# On Linux, Python subprocess module returns positive `returncode` +# (255 for example), even if the child does exit(-1 for example). It +# returns negative `returncode` only if the child is killed by a signal. +# Qt-testrunner wants to catch both of these cases as CRASH. +# On Windows, a crash is usually accompanied by exitcode >= 0xC0000000. +# Finally, QTest is limiting itself to exit codes in [0,127] +# so anything outside that range is abnormal, thus treated as CRASH. +# # TODO implement --dry-run. # Exit codes of this script: @@ -63,9 +82,17 @@ from pprint import pprint from typing import NamedTuple, Tuple, List, Optional # Define a custom type for returning a fail incident -class WhatFailed(NamedTuple): +class TestResult(NamedTuple): func: str tag: Optional[str] = None +class WhatFailed(NamedTuple): + qfatal_message: Optional[str] = None + failed_tests: List[TestResult] = [] + +class ReRunCrash(Exception): + pass +class BadXMLCrash(Exception): + pass # In the last test re-run, we add special verbosity arguments, in an attempt @@ -83,9 +110,11 @@ NO_RERUN_FUNCTIONS = { # not try to append "-o" to their command-line or re-run failed testcases. # Only add tests here if absolutely necessary! NON_XML_GENERATING_TESTS = { - "tst_selftests", # qtestlib's selftests are using an external test framework (Catch) that does not support -o argument - "tst_QDoc", # Some of QDoc's tests are using an external test framework (Catch) that does not support -o argument - "tst_QDoc_Catch_Generators", # Some of QDoc's tests are using an external test framework (Catch) that does not support -o argument + # These tests use an external test framework (Catch) that doesn't support + # QtTest's -o argument. + "tst_selftests", + "tst_QDoc", + "tst_QDoc_Catch_Generators", } # These are scripts that are used to wrap test execution for special platforms. # They need special handling (most times just skipping the wrapper name in argv[]). @@ -131,6 +160,9 @@ Default flags: --max-repeats 5 --passes-needed 1 " -o log_file.xml -v2 -vs. This will disable some functionality like the" " failed test repetition and the verbose output on failure. This is" " activated by default when TESTARGS is tst_selftests.") + # TODO parser.parse_args(args=sys.argv[0:cmd_index]). + # Where cmd_index is either the first positional argument, or the argument right after "--". + # This way it won't interpet arguments after the first positional arg. args = parser.parse_args() args.self_name = os.path.basename(sys.argv[0]) args.specific_extra_args = [] @@ -198,11 +230,13 @@ Default flags: --max-repeats 5 --passes-needed 1 return args -def parse_log(results_file) -> List[WhatFailed]: - """Parse the XML test log file. Return the failed testcases, if any. +def parse_log(results_file) -> WhatFailed: + """ + Parse the XML test log file. Return the failed testcases, if any, + and the first qfatal message possibly printed. Failures are considered the "fail" and "xpass" incidents. - A testcase is a function with an optional data tag.""" + """ start_timer = timeit.default_timer() try: @@ -222,10 +256,12 @@ def parse_log(results_file) -> List[WhatFailed]: root = tree.getroot() if root.tag != "TestCase": - raise AssertionError( + raise BadXMLCrash( f"The XML test log must have <TestCase> as root tag, but has: <{root.tag}>") failures = [] + qfatal_message = None + n_passes = 0 for e1 in root: if e1.tag == "TestFunction": @@ -233,23 +269,43 @@ def parse_log(results_file) -> List[WhatFailed]: if e2.tag == "Incident": if e2.attrib["type"] in ("fail", "xpass"): func = e1.attrib["name"] + datatag = None e3 = e2.find("DataTag") # every <Incident> might have a <DataTag> if e3 is not None: - failures.append(WhatFailed(func, tag=e3.text)) - else: - failures.append(WhatFailed(func)) + datatag = e3.text + failures.append(TestResult(func, datatag)) else: n_passes += 1 + # Use iter() here to _recursively_ search root for <Message>, + # as we don't trust that messages are always at the same depth. + for message_tag in root.iter(tag="Message"): + messagetype = message_tag.get("type") + if messagetype == "qfatal": + message_desc = message_tag.find("Description") + if message_desc is not None: + qfatal_message = message_desc.text + else: + qfatal_message = "--EMPTY QFATAL--" + L.warning("qFatal message ('%s') found in the XML, treating this run as a CRASH!", + qfatal_message) + break + end_timer = timeit.default_timer() t = end_timer - start_timer L.info(f"Parsed XML file {results_file} in {t:.3f} seconds") L.info(f"Found {n_passes} passes and {len(failures)} failures") - return failures + return WhatFailed(qfatal_message, failures) def run_test(arg_list: List[str], **kwargs): + if (os.environ.get("QT_TESTRUNNER_TESTING", "0") == "1" + and os.name == "nt" + and arg_list[0].endswith(".py") + ): + # For executing qt_mock_test.py under the same Python interpreter when testing. + arg_list = [ sys.executable ] + arg_list L.debug("Running test command line: %s", arg_list) proc = subprocess.run(arg_list, **kwargs) L.info("Test process exited with code: %d", proc.returncode) @@ -257,6 +313,11 @@ def run_test(arg_list: List[str], **kwargs): return proc def unique_filename(test_basename: str) -> str: + + # Hidden env var for testing, enforcing a predictable, non-unique filename. + if os.environ.get("QT_TESTRUNNER_DEBUG_NO_UNIQUE_OUTPUT_FILENAME"): + return f"{test_basename}" + timestamp = round(time.time() * 1000) return f"{test_basename}-{timestamp}" @@ -291,18 +352,19 @@ def run_full_test(test_basename, testargs: List[str], output_dir: str, def rerun_failed_testcase(test_basename, testargs: List[str], output_dir: str, - what_failed: WhatFailed, + testcase: TestResult, max_repeats, passes_needed, dryrun=False, timeout=None) -> bool: """Run a specific function:tag of a test, until it passes enough times, or until max_repeats is reached. Return True if it passes eventually, False if it fails. + Raise ReRunCrash Exception if it crashes. """ assert passes_needed <= max_repeats - failed_arg = what_failed.func - if what_failed.tag: - failed_arg += ":" + what_failed.tag + failed_arg = testcase.func + if testcase.tag: + failed_arg += ":" + testcase.tag n_passes = 0 @@ -325,6 +387,19 @@ def rerun_failed_testcase(test_basename, testargs: List[str], output_dir: str, proc = run_test(testargs + output_args + VERBOSE_ARGS + [failed_arg], timeout=timeout, env={**os.environ, **VERBOSE_ENV}) + # There are platforms that run tests wrapped with some test-runner + # script, that can possibly fail to extract a process exit code. + # Because of these cases, we *also* parse the XML file and signify + # CRASH in case of QFATAL/empty/corrupt result. + what_failed = parse_log(f"{pathname_stem}.xml") + if what_failed.qfatal_message: + raise ReRunCrash(f"CRASH! returncode:{proc.returncode} " + f"QFATAL:'{what_failed.qfatal_message}'") + if proc.returncode < 0 or proc.returncode >= 128: + raise ReRunCrash(f"CRASH! returncode:{proc.returncode}") + if proc.returncode == 0 and len(what_failed.failed_tests) > 0: + raise ReRunCrash("CRASH! returncode:0 but failures were found: " + + what_failed.failed_tests) if proc.returncode == 0: n_passes += 1 if n_passes == passes_needed: @@ -354,20 +429,22 @@ def main(): try: results_file = None - failed_functions = [] + what_failed = WhatFailed() if args.parse_xml_testlog: # do not run test, just parse file - failed_functions = parse_log(args.parse_xml_testlog) + what_failed = parse_log(args.parse_xml_testlog) # Pretend the test returned correct exit code - retcode = len(failed_functions) + retcode = len(what_failed.failed_tests) else: # normal invocation, run test (retcode, results_file) = \ run_full_test(args.test_basename, args.testargs, args.log_dir, args.no_extra_args, args.dry_run, args.timeout, args.specific_extra_args) if results_file: - failed_functions = parse_log(results_file) + what_failed = parse_log(results_file) + + failed_functions = what_failed.failed_tests - if retcode < 0: + if retcode < 0 or retcode >= 128 or what_failed.qfatal_message: L.warning("CRASH detected, re-running the whole executable") continue if retcode == 0: @@ -392,6 +469,8 @@ def main(): assert len(failed_functions) > 0 and retcode != 0 break # all is fine, goto re-running individual failed testcases + except AssertionError: + raise except Exception as e: L.error("exception:%s %s", type(e).__name__, e) L.error("The test executable probably crashed, see above for details") @@ -402,13 +481,15 @@ def main(): L.info("Some tests failed, will re-run at most %d times.\n", args.max_repeats) - for what_failed in failed_functions: + for test_result in failed_functions: try: ret = rerun_failed_testcase(args.test_basename, args.testargs, args.log_dir, - what_failed, args.max_repeats, args.passes_needed, + test_result, args.max_repeats, args.passes_needed, dryrun=args.dry_run, timeout=args.timeout) + except AssertionError: + raise except Exception as e: - L.error("exception:%s %s", type(e).__name__, e) + L.error("exception:%s", e) L.error("The testcase re-run probably crashed, giving up") sys.exit(3) # Test re-run CRASH |
