0

exception-db: Add one more retry for the exception record upload.

The code used to only try two times, and report the upload failure in
the last try. This CL increases the try times to 3 so that it will try
to clear the stacktrace and upload in the 2nd try. This will help
preserve more exception data records.

Also add unittest for the exception data upload.

Bug: 341362003
Change-Id: I3d924be3ee072bd5d49c44527e1a1d6648387e79
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5953815
Reviewed-by: Andrew Grieve <agrieve@chromium.org>
Commit-Queue: Haiyang Pan <hypan@google.com>
Cr-Commit-Position: refs/heads/main@{#1374101}
This commit is contained in:
Haiyang Pan
2024-10-25 20:03:58 +00:00
committed by Chromium LUCI CQ
parent ae0c8e3d4b
commit e33028c0df
4 changed files with 108 additions and 30 deletions

@ -7,3 +7,4 @@ wnwen@chromium.org
per-file generate_vscode_classpath.py=edechamps@google.com
per-file test_runner.py=hypan@google.com
per-file test_runner_test.py=hypan@google.com

@ -1034,6 +1034,38 @@ _SUPPORTED_IN_PLATFORM_MODE = [
]
def UploadExceptions(result_sink_client, exc_recorder):
if not result_sink_client or not exc_recorder.size():
return
try_count_max = 3
for try_count in range(1, try_count_max + 1):
logging.info('Uploading exception records to RDB. (TRY %d/%d)', try_count,
try_count_max)
try:
record_dict = exc_recorder.to_dict()
result_sink_client.UpdateInvocationExtendedProperties(
{exc_recorder.EXCEPTION_OCCURRENCES_KEY: record_dict})
exc_recorder.clear()
break
except Exception as e: # pylint: disable=W0703
logging.error("Got error %s when uploading exception records.", e)
# Upload can fail due to record size being too big.
# In this case, let's try to reduce the size.
if try_count == try_count_max - 2:
# Clear all the stackstrace to reduce size.
exc_recorder.clear_stacktrace()
elif try_count == try_count_max - 1:
# Clear all the records and just report the upload failure.
exc_recorder.clear()
exc_recorder.register(e)
elif try_count == try_count_max:
# Swallow the exception if the upload fails again and hit the max
# try so that it won't fail the test task (and it shouldn't).
exc_recorder.clear()
logging.error("Hit max retry. Skip uploading exception records.")
def RunTestsInPlatformMode(args, result_sink_client=None):
def infra_error(message):
@ -1163,39 +1195,10 @@ def RunTestsInPlatformMode(args, result_sink_client=None):
@contextlib.contextmanager
def exceptions_uploader():
def _upload_exceptions():
if not result_sink_client or not exception_recorder.size():
return
try_count = 0
try_count_max = 2
while try_count < try_count_max:
try_count += 1
logging.info('Uploading exception records to RDB. (TRY %d/%d)',
try_count, try_count_max)
try:
record_dict = exception_recorder.to_dict()
exception_recorder.clear()
result_sink_client.UpdateInvocationExtendedProperties(
{exception_recorder.EXCEPTION_OCCURRENCES_KEY: record_dict})
break
except Exception as e: # pylint: disable=W0703
logging.error("Got error %s when uploading exception records:\n%r", e,
record_dict)
if try_count < try_count_max:
# Upload can fail due to record size being too big. In this case,
# report just the upload failure.
exception_recorder.register(e)
else:
# Swallow the exception if the upload fails again and hit the max
# try so that it won't fail the test task (and it shouldn't).
logging.error("Hit max retry. Skip uploading exception records.")
try:
yield
finally:
_upload_exceptions()
UploadExceptions(result_sink_client, exception_recorder)
### Set up test objects.

@ -0,0 +1,64 @@
#!/usr/bin/env vpython3
#
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
from unittest import mock
import test_runner
class UploadExceptionTest(unittest.TestCase):
def setUp(self):
self.sink_client = mock.MagicMock()
self.exc_recorder = mock.MagicMock()
def testNoExceptions(self):
self.exc_recorder.size.return_value = 0
test_runner.UploadExceptions(self.sink_client, self.exc_recorder)
self.exc_recorder.to_dict.assert_not_called()
self.sink_client.UpdateInvocationExtendedProperties.assert_not_called()
def testUploadSuccess(self):
test_runner.UploadExceptions(self.sink_client, self.exc_recorder)
self.exc_recorder.to_dict.assert_called_once()
self.sink_client.UpdateInvocationExtendedProperties.assert_called_once()
self.exc_recorder.clear.assert_called_once()
def testUploadSuccessWithClearStacktrace(self):
self.sink_client.UpdateInvocationExtendedProperties.side_effect = [
Exception("Error 1"), None
]
test_runner.UploadExceptions(self.sink_client, self.exc_recorder)
self.assertEqual(self.exc_recorder.to_dict.call_count, 2)
self.assertEqual(
self.sink_client.UpdateInvocationExtendedProperties.call_count, 2)
self.exc_recorder.clear_stacktrace.assert_called_once()
self.exc_recorder.clear.assert_called_once()
def testUploadSuccessWithClearRecords(self):
self.sink_client.UpdateInvocationExtendedProperties.side_effect = [
Exception("Error 1"), Exception("Error 2"), None
]
test_runner.UploadExceptions(self.sink_client, self.exc_recorder)
self.assertEqual(self.exc_recorder.to_dict.call_count, 3)
self.assertEqual(
self.sink_client.UpdateInvocationExtendedProperties.call_count, 3)
self.exc_recorder.clear_stacktrace.assert_called_once()
self.assertEqual(self.exc_recorder.clear.call_count, 2)
self.exc_recorder.register.assert_called_once()
def testUploadFailure(self):
self.sink_client.UpdateInvocationExtendedProperties.side_effect = (
Exception("Error"))
test_runner.UploadExceptions(self.sink_client, self.exc_recorder)
self.assertEqual(self.exc_recorder.to_dict.call_count, 3)
self.assertEqual(
self.sink_client.UpdateInvocationExtendedProperties.call_count, 3)
self.assertEqual(self.exc_recorder.clear.call_count, 2)
self.exc_recorder.clear_stacktrace.assert_called_once()
self.exc_recorder.register.assert_called_once()

@ -77,6 +77,16 @@ def clear() -> None:
_records.clear()
def clear_stacktrace() -> None:
"""Clear the stacktrace from all the records while keeping the records.
This can be called to reduce the size of the overall records and avoid
the size issue when uploaded to other services, e.g. RDB.
"""
for record in _records:
record.ClearField('stacktrace')
def to_dict() -> dict:
"""Convert all the registered ExceptionOccurrence records to an dict.