mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-28 05:43:50 +00:00
Troubleshoot blackbox crash test final verification hang (#13070)
Summary: Add a timeout for the blackbox crash test final verification step, and print the db_stress stack trace on a timeout. The crash test occasionally hangs in the verification step and this will help debug. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13070 Reviewed By: hx235 Differential Revision: D64414461 Pulled By: anand1976 fbshipit-source-id: 4629aac01fbe6c788665beddc66280ba446aadbe
This commit is contained in:
parent
cbebbad7d9
commit
2abbb02d14
|
@ -443,6 +443,8 @@ blackbox_default_params = {
|
||||||
"duration": 6000,
|
"duration": 6000,
|
||||||
# time for one db_stress instance to run
|
# time for one db_stress instance to run
|
||||||
"interval": 120,
|
"interval": 120,
|
||||||
|
# time for the final verification step
|
||||||
|
"verify_timeout": 1200,
|
||||||
# since we will be killing anyway, use large value for ops_per_thread
|
# since we will be killing anyway, use large value for ops_per_thread
|
||||||
"ops_per_thread": 100000000,
|
"ops_per_thread": 100000000,
|
||||||
"reopen": 0,
|
"reopen": 0,
|
||||||
|
@ -1047,6 +1049,7 @@ def gen_cmd(params, unknown_params):
|
||||||
"cleanup_cmd",
|
"cleanup_cmd",
|
||||||
"skip_tmpdir_check",
|
"skip_tmpdir_check",
|
||||||
"print_stderr_separately",
|
"print_stderr_separately",
|
||||||
|
"verify_timeout",
|
||||||
}
|
}
|
||||||
and v is not None
|
and v is not None
|
||||||
]
|
]
|
||||||
|
@ -1055,9 +1058,10 @@ def gen_cmd(params, unknown_params):
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
def execute_cmd(cmd, timeout=None):
|
def execute_cmd(cmd, timeout=None, timeout_pstack=False):
|
||||||
child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
|
print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
|
||||||
|
pid = child.pid
|
||||||
|
|
||||||
try:
|
try:
|
||||||
outs, errs = child.communicate(timeout=timeout)
|
outs, errs = child.communicate(timeout=timeout)
|
||||||
|
@ -1065,6 +1069,8 @@ def execute_cmd(cmd, timeout=None):
|
||||||
print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
|
print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
hit_timeout = True
|
hit_timeout = True
|
||||||
|
if timeout_pstack:
|
||||||
|
os.system("pstack %d" % pid)
|
||||||
child.kill()
|
child.kill()
|
||||||
print("KILLED %d\n" % child.pid)
|
print("KILLED %d\n" % child.pid)
|
||||||
outs, errs = child.communicate()
|
outs, errs = child.communicate()
|
||||||
|
@ -1139,7 +1145,7 @@ def blackbox_crash_main(args, unknown_args):
|
||||||
cmd = gen_cmd(
|
cmd = gen_cmd(
|
||||||
dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
|
dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
|
||||||
)
|
)
|
||||||
hit_timeout, retcode, outs, errs = execute_cmd(cmd)
|
hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["verify_timeout"], True)
|
||||||
|
|
||||||
# For the final run
|
# For the final run
|
||||||
print_output_and_exit_on_error(outs, errs, args.print_stderr_separately)
|
print_output_and_exit_on_error(outs, errs, args.print_stderr_separately)
|
||||||
|
|
Loading…
Reference in a new issue