1 files changed, 118 insertions, 37 deletions
diff --git a/util/testrunner/qt-testrunner.py b/util/testrunner/qt-testrunner.py
index 41e81e83122..1573534cee9 100755
--- a/util/testrunner/qt-testrunner.py
+++ b/util/testrunner/qt-testrunner.py
@@ -4,10 +4,9 @@
 
 
 # !!!IMPORTANT!!!  If you change anything to this script, run the testsuite
-#    manually and make sure it still passes, as it doesn't run automatically.
-#    Just execute the command line as such:
+#   and make sure it still passes:
 #
-#      ./util/testrunner/tests/tst_testrunner.py -v [--debug]
+#       qtbase/tests/auto/util/testrunner/tst_qt_testrunner.py -v [--debug]
 #
 # ======== qt-testrunner ========
 #
@@ -15,24 +14,44 @@
 # tst_whatever, and tries to iron out unpredictable test failures.
 # In particular:
 #
-# + Appends output argument to it: "-o tst_whatever.xml,xml"
-# + Checks the exit code. If it is zero, the script exits with zero,
-#   otherwise proceeds.
-# + Reads the XML test log and Understands exactly which function
-#   of the test failed.
-#   + If no XML file is found or was invalid, the test executable
-#     probably CRASHed, so we *re-run the full test once again*.
-# + If some testcases failed it executes only those individually
-#   until they pass, or until max-repeats times is reached.
+# + Append output argument to it: "-o tst_whatever.xml,xml" and
+#   execute it.
+# + Save the exit code.
+#   - If it is <0 or >=128 (see NOTE_2), mark the test run as CRASH.
+# + Read the XML test log and find exactly which functions
+#   of the test FAILed.
+# + Mark the test run as CRASH, if:
+#   - no XML file is found,
+#   - or an invalid XML file is found,
+#   - or the XML contains a QFatal message: <Message type="qfatal">
+#   - or no test FAILures are listed in the XML but the saved
+#     exit code is not 0.
+# + If, based on the rules above, the test run is marked as CRASH,
+#   then *re-run the full test once again* and start this logic over.
+#   If we are on the 2nd run and CRASH happens again, then exit(3).
+# + Examine the saved exit code:
+#   if it is 0, then exit(0) (success, all tests have PASSed).
+# + Otherwise, some testcases failed, so execute only those individually
+#   until they pass, or until max-repeats (default: 5) times is reached.
 #
 # The regular way to use is to set the environment variable TESTRUNNER to
-# point to this script before invoking ctest.
+# point to this script before invoking ctest. In COIN CI it is set as
+# TESTRUNNER="qt-testrunner.py --" to stop it from parsing further args.
 #
 # NOTE: this script is crafted specifically for use with Qt tests and for
 #       using it in Qt's CI. For example it detects and acts specially if test
 #       executable is "tst_selftests" or "androidtestrunner".  It also detects
 #       env var "COIN_CTEST_RESULTSDIR" and uses it as log-dir.
 #
+# NOTE_2: Why is qt-testrunner considering exit code outside [0,127] as CRASH?
+#         On Linux, Python subprocess module returns positive `returncode`
+#         (255 for example), even if the child does exit(-1 for example). It
+#         returns negative `returncode` only if the child is killed by a signal.
+#         Qt-testrunner wants to catch both of these cases as CRASH.
+#         On Windows, a crash is usually accompanied by exitcode >= 0xC0000000.
+#         Finally, QTest is limiting itself to exit codes in [0,127]
+#         so anything outside that range is abnormal, thus treated as CRASH.
+#
 # TODO implement --dry-run.
 
 # Exit codes of this script:
@@ -63,9 +82,17 @@ from pprint import pprint
 from typing import NamedTuple, Tuple, List, Optional
 
 # Define a custom type for returning a fail incident
-class WhatFailed(NamedTuple):
+class TestResult(NamedTuple):
     func: str
     tag: Optional[str] = None
+class WhatFailed(NamedTuple):
+    qfatal_message: Optional[str]    = None
+    failed_tests:   List[TestResult] = []
+
+class ReRunCrash(Exception):
+    pass
+class BadXMLCrash(Exception):
+    pass
 
 
 # In the last test re-run, we add special verbosity arguments, in an attempt
@@ -83,9 +110,11 @@ NO_RERUN_FUNCTIONS = {
 # not try to append "-o" to their command-line or re-run failed testcases.
 # Only add tests here if absolutely necessary!
 NON_XML_GENERATING_TESTS = {
-    "tst_selftests",                # qtestlib's selftests are using an external test framework (Catch) that does not support -o argument
-    "tst_QDoc",                     # Some of QDoc's tests are using an external test framework (Catch) that does not support -o argument
-    "tst_QDoc_Catch_Generators",    # Some of QDoc's tests are using an external test framework (Catch) that does not support -o argument
+    # These tests use an external test framework (Catch) that doesn't support
+    # QtTest's -o argument.
+    "tst_selftests",
+    "tst_QDoc",
+    "tst_QDoc_Catch_Generators",
 }
 # These are scripts that are used to wrap test execution for special platforms.
 # They need special handling (most times just skipping the wrapper name in argv[]).
@@ -131,6 +160,9 @@ Default flags: --max-repeats 5 --passes-needed 1
                         " -o log_file.xml -v2 -vs. This will disable some functionality like the"
                         " failed test repetition and the verbose output on failure. This is"
                         " activated by default when TESTARGS is tst_selftests.")
+    # TODO parser.parse_args(args=sys.argv[0:cmd_index]).
+    #   Where cmd_index is either the first positional argument, or the argument right after "--".
+    #   This way it won't interpet arguments after the first positional arg.
     args = parser.parse_args()
     args.self_name = os.path.basename(sys.argv[0])
     args.specific_extra_args = []
@@ -198,11 +230,13 @@ Default flags: --max-repeats 5 --passes-needed 1
     return args
 
 
-def parse_log(results_file) -> List[WhatFailed]:
-    """Parse the XML test log file. Return the failed testcases, if any.
+def parse_log(results_file) -> WhatFailed:
+    """
+    Parse the XML test log file. Return the failed testcases, if any,
+    and the first qfatal message possibly printed.
 
     Failures are considered the "fail" and "xpass" incidents.
-    A testcase is a function with an optional data tag."""
+    """
     start_timer = timeit.default_timer()
 
     try:
@@ -222,10 +256,12 @@ def parse_log(results_file) -> List[WhatFailed]:
 
     root = tree.getroot()
     if root.tag != "TestCase":
-        raise AssertionError(
+        raise BadXMLCrash(
             f"The XML test log must have <TestCase> as root tag, but has: <{root.tag}>")
 
     failures = []
+    qfatal_message = None
+
     n_passes = 0
     for e1 in root:
         if e1.tag == "TestFunction":
@@ -233,23 +269,43 @@ def parse_log(results_file) -> List[WhatFailed]:
                 if e2.tag == "Incident":
                     if e2.attrib["type"] in ("fail", "xpass"):
                         func = e1.attrib["name"]
+                        datatag = None
                         e3 = e2.find("DataTag")    # every <Incident> might have a <DataTag>
                         if e3 is not None:
-                            failures.append(WhatFailed(func, tag=e3.text))
-                        else:
-                            failures.append(WhatFailed(func))
+                            datatag = e3.text
+                        failures.append(TestResult(func, datatag))
                     else:
                         n_passes += 1
 
+    # Use iter() here to _recursively_ search root for <Message>,
+    # as we don't trust that messages are always at the same depth.
+    for message_tag in root.iter(tag="Message"):
+        messagetype = message_tag.get("type")
+        if messagetype == "qfatal":
+            message_desc = message_tag.find("Description")
+            if message_desc is not None:
+                qfatal_message = message_desc.text
+            else:
+                qfatal_message = "--EMPTY QFATAL--"
+            L.warning("qFatal message ('%s') found in the XML, treating this run as a CRASH!",
+                      qfatal_message)
+            break
+
     end_timer = timeit.default_timer()
     t = end_timer - start_timer
     L.info(f"Parsed XML file {results_file} in {t:.3f} seconds")
     L.info(f"Found {n_passes} passes and {len(failures)} failures")
 
-    return failures
+    return WhatFailed(qfatal_message, failures)
 
 
 def run_test(arg_list: List[str], **kwargs):
+    if (os.environ.get("QT_TESTRUNNER_TESTING", "0") == "1"
+        and os.name == "nt"
+        and arg_list[0].endswith(".py")
+    ):
+        # For executing qt_mock_test.py under the same Python interpreter when testing.
+        arg_list = [ sys.executable ] + arg_list
     L.debug("Running test command line: %s", arg_list)
     proc = subprocess.run(arg_list, **kwargs)
     L.info("Test process exited with code: %d", proc.returncode)
@@ -257,6 +313,11 @@ def run_test(arg_list: List[str], **kwargs):
     return proc
 
 def unique_filename(test_basename: str) -> str:
+
+    # Hidden env var for testing, enforcing a predictable, non-unique filename.
+    if os.environ.get("QT_TESTRUNNER_DEBUG_NO_UNIQUE_OUTPUT_FILENAME"):
+        return f"{test_basename}"
+
     timestamp = round(time.time() * 1000)
     return f"{test_basename}-{timestamp}"
 
@@ -291,18 +352,19 @@ def run_full_test(test_basename, testargs: List[str], output_dir: str,
 
 
 def rerun_failed_testcase(test_basename, testargs: List[str], output_dir: str,
-                          what_failed: WhatFailed,
+                          testcase: TestResult,
                           max_repeats, passes_needed,
                           dryrun=False, timeout=None) -> bool:
     """Run a specific function:tag of a test, until it passes enough times, or
     until max_repeats is reached.
 
     Return True if it passes eventually, False if it fails.
+    Raise ReRunCrash Exception if it crashes.
     """
     assert passes_needed <= max_repeats
-    failed_arg = what_failed.func
-    if what_failed.tag:
-        failed_arg += ":" + what_failed.tag
+    failed_arg = testcase.func
+    if testcase.tag:
+        failed_arg += ":" + testcase.tag
 
 
     n_passes = 0
@@ -325,6 +387,19 @@ def rerun_failed_testcase(test_basename, testargs: List[str], output_dir: str,
             proc = run_test(testargs + output_args + VERBOSE_ARGS + [failed_arg],
                             timeout=timeout,
                             env={**os.environ, **VERBOSE_ENV})
+        # There are platforms that run tests wrapped with some test-runner
+        # script, that can possibly fail to extract a process exit code.
+        # Because of these cases, we *also* parse the XML file and signify
+        # CRASH in case of QFATAL/empty/corrupt result.
+        what_failed = parse_log(f"{pathname_stem}.xml")
+        if what_failed.qfatal_message:
+            raise ReRunCrash(f"CRASH! returncode:{proc.returncode} "
+                             f"QFATAL:'{what_failed.qfatal_message}'")
+        if proc.returncode < 0 or proc.returncode >= 128:
+            raise ReRunCrash(f"CRASH! returncode:{proc.returncode}")
+        if proc.returncode == 0 and len(what_failed.failed_tests) > 0:
+            raise ReRunCrash("CRASH! returncode:0 but failures were found: "
+                             + what_failed.failed_tests)
         if proc.returncode == 0:
             n_passes += 1
         if n_passes == passes_needed:
@@ -354,20 +429,22 @@ def main():
 
         try:
             results_file = None
-            failed_functions = []
+            what_failed = WhatFailed()
             if args.parse_xml_testlog:      # do not run test, just parse file
-                failed_functions = parse_log(args.parse_xml_testlog)
+                what_failed = parse_log(args.parse_xml_testlog)
                 # Pretend the test returned correct exit code
-                retcode = len(failed_functions)
+                retcode = len(what_failed.failed_tests)
             else:                                # normal invocation, run test
                 (retcode, results_file) = \
                     run_full_test(args.test_basename, args.testargs, args.log_dir,
                                   args.no_extra_args, args.dry_run, args.timeout,
                                   args.specific_extra_args)
                 if results_file:
-                    failed_functions = parse_log(results_file)
+                    what_failed = parse_log(results_file)
+
+            failed_functions = what_failed.failed_tests
 
-            if retcode < 0:
+            if retcode < 0 or retcode >= 128 or what_failed.qfatal_message:
                 L.warning("CRASH detected, re-running the whole executable")
                 continue
             if retcode == 0:
@@ -392,6 +469,8 @@ def main():
             assert len(failed_functions) > 0  and  retcode != 0
             break    # all is fine, goto re-running individual failed testcases
 
+        except AssertionError:
+            raise
         except Exception as e:
             L.error("exception:%s %s", type(e).__name__, e)
             L.error("The test executable probably crashed, see above for details")
@@ -402,13 +481,15 @@ def main():
     L.info("Some tests failed, will re-run at most %d times.\n",
            args.max_repeats)
 
-    for what_failed in failed_functions:
+    for test_result in failed_functions:
         try:
             ret = rerun_failed_testcase(args.test_basename, args.testargs, args.log_dir,
-                                        what_failed, args.max_repeats, args.passes_needed,
+                                        test_result, args.max_repeats, args.passes_needed,
                                         dryrun=args.dry_run, timeout=args.timeout)
+        except AssertionError:
+            raise
         except Exception as e:
-            L.error("exception:%s %s", type(e).__name__, e)
+            L.error("exception:%s", e)
             L.error("The testcase re-run probably crashed, giving up")
             sys.exit(3)                                    # Test re-run CRASH