Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
* <li>On a write timeout, retries once on the same host if we timeout while writing the
* distributed log used by batch statements.
* <li>On an unavailable exception, retries once on the next host.
* <li>On a request error, such as a client timeout, the query is retried on the next host. Do not
* retry on read or write failures.
* <li>On a request error, such as a client timeout, retries once on the next host. Do not retry
* on read or write failures.
* </ul>
*
* <p>This retry policy is conservative in that it will never retry with a different consistency
Expand Down Expand Up @@ -136,16 +136,27 @@ public RetryDecision onUnavailable(
return (nbRetry == 0) ? RetryDecision.tryNextHost(null) : RetryDecision.rethrow();
}

/** {@inheritDoc} */
/**
* {@inheritDoc}
*
* <p>This implementation triggers a maximum of one retry on the next host in the query plan. The
* rationale is that the first coordinator might have been network-isolated or overloaded, and
* moving to the next host might resolve the issue. If the retry also fails, the exception is
* rethrown.
*
* <p>Read and write failures are never retried, as they generally indicate a data problem that is
* unlikely to be resolved by a retry.
*
* @return {@code RetryDecision.tryNextHost(cl)} if no retry attempt has yet been tried and the
* error is not a read/write failure, {@code RetryDecision.rethrow()} otherwise.
*/
@Override
public RetryDecision onRequestError(
Statement statement, ConsistencyLevel cl, DriverException e, int nbRetry) {
// do not retry these by default as they generally indicate a data problem or
// other issue that is unlikely to be resolved by a retry.
if (e instanceof WriteFailureException || e instanceof ReadFailureException) {
return RetryDecision.rethrow();
}
return RetryDecision.tryNextHost(cl);
return (nbRetry == 0) ? RetryDecision.tryNextHost(cl) : RetryDecision.rethrow();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,24 @@ public RetryDecision onUnavailable(
/**
* {@inheritDoc}
*
* <p>For historical reasons, this implementation triggers a retry on the next host in the query
* plan with the same consistency level, regardless of the statement's idempotence. Note that this
* breaks the general rule stated in {@link RetryPolicy#onRequestError(Statement,
* ConsistencyLevel, DriverException, int)}: "a retry should only be attempted if the request is
* known to be idempotent".`
* <p>This implementation triggers a maximum of one retry on the next host in the query plan. The
* rationale is that the first coordinator might have been network-isolated or overloaded, and
* moving to the next host might resolve the issue. If the retry also fails, the exception is
* rethrown.
*
* <p>Read and write failures are never retried, as they generally indicate a data problem that is
* unlikely to be resolved by a retry.
*
* @return {@code RetryDecision.tryNextHost(cl)} if no retry attempt has yet been tried and the
* error is not a read/write failure, {@code RetryDecision.rethrow()} otherwise.
*/
@Override
public RetryDecision onRequestError(
Statement statement, ConsistencyLevel cl, DriverException e, int nbRetry) {
// do not retry these by default as they generally indicate a data problem or
// other issue that is unlikely to be resolved by a retry.
if (e instanceof WriteFailureException || e instanceof ReadFailureException) {
return RetryDecision.rethrow();
}
return RetryDecision.tryNextHost(cl);
return (nbRetry == 0) ? RetryDecision.tryNextHost(cl) : RetryDecision.rethrow();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,38 @@ public void should_rethrow_unavailable_in_no_host_available_exception() {
}

@Test(groups = "short")
public void should_try_next_host_on_client_timeouts() {
public void should_try_next_host_on_first_client_timeout() {
cluster.getConfiguration().getSocketOptions().setReadTimeoutMillis(1);
try {
scassandras
.node(1)
.primingClient()
.prime(
PrimingRequest.queryBuilder()
.withQuery("mock query")
.withThen(then().withFixedDelay(1000L).withRows(row("result", "result1")))
.build());
simulateNormalResponse(2);

query();

assertOnRequestErrorWasCalled(1, OperationTimedOutException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getClientTimeouts().getCount()).isEqualTo(1);
assertThat(errors.getRetriesOnClientTimeout().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 0);
} finally {
cluster
.getConfiguration()
.getSocketOptions()
.setReadTimeoutMillis(SocketOptions.DEFAULT_READ_TIMEOUT_MILLIS);
}
}

@Test(groups = "short")
public void should_rethrow_on_second_client_timeout() {
cluster.getConfiguration().getSocketOptions().setReadTimeoutMillis(1);
try {
scassandras
Expand Down Expand Up @@ -208,32 +239,17 @@ public void should_try_next_host_on_client_timeouts() {
.build());
try {
query();
fail("expected a NoHostAvailableException");
} catch (NoHostAvailableException e) {
assertThat(e.getErrors().keySet())
.hasSize(3)
.containsOnly(host1.getEndPoint(), host2.getEndPoint(), host3.getEndPoint());
assertThat(e.getErrors().values()).hasOnlyElementsOfType(OperationTimedOutException.class);
assertThat(
((OperationTimedOutException) e.getErrors().get(host1.getEndPoint())).getMessage())
.contains(
String.format("[%s] Timed out waiting for server response", host1.getEndPoint()));
assertThat(
((OperationTimedOutException) e.getErrors().get(host2.getEndPoint())).getMessage())
.contains(
String.format("[%s] Timed out waiting for server response", host2.getEndPoint()));
assertThat(
((OperationTimedOutException) e.getErrors().get(host3.getEndPoint())).getMessage())
.contains(
String.format("[%s] Timed out waiting for server response", host3.getEndPoint()));
fail("expected an OperationTimedOutException");
} catch (OperationTimedOutException e) {
assertThat(e.getMessage()).contains("Timed out waiting for server response");
}
assertOnRequestErrorWasCalled(3, OperationTimedOutException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(3);
assertThat(errors.getClientTimeouts().getCount()).isEqualTo(3);
assertThat(errors.getRetriesOnClientTimeout().getCount()).isEqualTo(3);
assertOnRequestErrorWasCalled(2, OperationTimedOutException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getClientTimeouts().getCount()).isEqualTo(2);
assertThat(errors.getRetriesOnClientTimeout().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 1);
assertQueried(3, 0);
} finally {
cluster
.getConfiguration()
Expand All @@ -243,27 +259,41 @@ public void should_try_next_host_on_client_timeouts() {
}

@Test(groups = "short", dataProvider = "serverSideErrors")
public void should_try_next_host_on_server_side_error(
public void should_try_next_host_on_first_server_side_error(
Result error, Class<? extends DriverException> exception) {
simulateError(1, error);
simulateNormalResponse(2);

query();

assertOnRequestErrorWasCalled(1, exception);
assertThat(errors.getOthers().getCount()).isEqualTo(1);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getRetriesOnOtherErrors().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 0);
}

@Test(groups = "short", dataProvider = "serverSideErrors")
public void should_rethrow_on_second_server_side_error(
Result error, Class<? extends DriverException> exception) {
simulateError(1, error);
simulateError(2, error);
simulateError(3, error);
try {
query();
Fail.fail("expected a NoHostAvailableException");
} catch (NoHostAvailableException e) {
assertThat(e.getErrors().keySet())
.hasSize(3)
.containsOnly(host1.getEndPoint(), host2.getEndPoint(), host3.getEndPoint());
assertThat(e.getErrors().values()).hasOnlyElementsOfType(exception);
Fail.fail("expected a " + exception.getSimpleName());
} catch (DriverException e) {
assertThat(e).isInstanceOf(exception);
}
assertOnRequestErrorWasCalled(3, exception);
assertThat(errors.getOthers().getCount()).isEqualTo(3);
assertThat(errors.getRetries().getCount()).isEqualTo(3);
assertThat(errors.getRetriesOnOtherErrors().getCount()).isEqualTo(3);
assertOnRequestErrorWasCalled(2, exception);
assertThat(errors.getOthers().getCount()).isEqualTo(2);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getRetriesOnOtherErrors().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 1);
assertQueried(3, 0);
}

@Test(groups = "short")
Expand Down Expand Up @@ -307,27 +337,43 @@ public void should_rethrow_on_write_failure() {
}

@Test(groups = "short", dataProvider = "connectionErrors")
public void should_try_next_host_on_connection_error(ClosedConnectionConfig.CloseType closeType) {
public void should_try_next_host_on_first_connection_error(
ClosedConnectionConfig.CloseType closeType) {
simulateError(1, closed_connection, new ClosedConnectionConfig(closeType));
simulateNormalResponse(2);

query();

assertOnRequestErrorWasCalled(1, TransportException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getConnectionErrors().getCount()).isEqualTo(1);
assertThat(errors.getIgnoresOnConnectionError().getCount()).isEqualTo(0);
assertThat(errors.getRetriesOnConnectionError().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 0);
}

@Test(groups = "short", dataProvider = "connectionErrors")
public void should_rethrow_on_second_connection_error(
ClosedConnectionConfig.CloseType closeType) {
simulateError(1, closed_connection, new ClosedConnectionConfig(closeType));
simulateError(2, closed_connection, new ClosedConnectionConfig(closeType));
simulateError(3, closed_connection, new ClosedConnectionConfig(closeType));
try {
query();
Fail.fail("expected a NoHostAvailableException");
} catch (NoHostAvailableException e) {
assertThat(e.getErrors().keySet())
.hasSize(3)
.containsOnly(host1.getEndPoint(), host2.getEndPoint(), host3.getEndPoint());
assertThat(e.getErrors().values()).hasOnlyElementsOfType(TransportException.class);
Fail.fail("expected a TransportException");
} catch (TransportException e) {
// expected — rethrown after one retry
}
assertOnRequestErrorWasCalled(3, TransportException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(3);
assertThat(errors.getConnectionErrors().getCount()).isEqualTo(3);
assertOnRequestErrorWasCalled(2, TransportException.class);
assertThat(errors.getRetries().getCount()).isEqualTo(1);
assertThat(errors.getConnectionErrors().getCount()).isEqualTo(2);
assertThat(errors.getIgnoresOnConnectionError().getCount()).isEqualTo(0);
assertThat(errors.getRetriesOnConnectionError().getCount()).isEqualTo(3);
assertThat(errors.getRetriesOnConnectionError().getCount()).isEqualTo(1);
assertQueried(1, 1);
assertQueried(2, 1);
assertQueried(3, 1);
assertQueried(3, 0);
}

@Test(groups = "short")
Expand Down
Loading
Loading