From c2126a6f5d3960e467360bdc1a42ed629b1a3208 Mon Sep 17 00:00:00 2001 From: Valere Date: Fri, 9 Feb 2024 19:07:42 +0100 Subject: [PATCH 1/8] Add basic retry for outgoing requests --- .../OutgoingRequestProcessor.spec.ts | 237 +++++++++++++++++- src/rust-crypto/OutgoingRequestProcessor.ts | 89 ++++++- 2 files changed, 313 insertions(+), 13 deletions(-) diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 3501ec8faed..081cc866145 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -27,8 +27,9 @@ import { UploadSigningKeysRequest, ToDeviceRequest, } from "@matrix-org/matrix-sdk-crypto-wasm"; +import fetchMock from "fetch-mock-jest"; -import { TypedEventEmitter } from "../../../src/models/typed-event-emitter"; +import { TypedEventEmitter } from "../../../src"; import { HttpApiEvent, HttpApiEventHandlerMap, IHttpOpts, MatrixHttpApi, UIAuthCallback } from "../../../src"; import { OutgoingRequestProcessor } from "../../../src/rust-crypto/OutgoingRequestProcessor"; import { defer } from "../../../src/utils"; @@ -274,4 +275,238 @@ describe("OutgoingRequestProcessor", () => { // ... and `makeOutgoingRequest` resolves satisfactorily await result; }); + + describe("Should retry requests", () => { + beforeEach(() => { + jest.useFakeTimers(); + + // here we use another httpApi instance in order to use fetchMock + const dummyEventEmitter = new TypedEventEmitter(); + const httpApi = new MatrixHttpApi(dummyEventEmitter, { + baseUrl: "https://example.com", + prefix: "/_matrix", + onlyData: true, + }); + + processor = new OutgoingRequestProcessor(olmMachine, httpApi); + }); + + afterEach(() => { + jest.useRealTimers(); + fetchMock.reset(); + }); + + describe("Should retry on retryable errors", () => { + const retryableErrors: Array<[number, { status: number; body: { error: string } }]> = [ + [429, { status: 429, body: { error: "Too Many Requests" } }], + [500, { status: 500, body: { error: "Internal Server Error" } }], + [502, { status: 502, body: { error: "Bad Gateway" } }], + [503, { status: 503, body: { error: "Service Unavailable" } }], + [504, { status: 504, body: { error: "Gateway Timeout" } }], + [525, { status: 525, body: { error: "SSL Handshake Failed" } }], + ]; + describe.each(retryableErrors)(`When status code is %s`, (_, error) => { + test.each(tests)(`for request of type %ss`, async (_, RequestClass, expectedMethod, expectedPath) => { + // first, mock up a request as we might expect to receive it from the Rust layer ... + const testBody = '{ "foo": "bar" }'; + const outgoingRequest = new RequestClass("1234", testBody); + + // @ts-ignore to avoid having to do if else to switch the method (.put/.post) + fetchMock[expectedMethod.toLowerCase()](expectedPath, error); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + // Run all timers and wait for the request promise to resolve/reject + await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]); + + await expect(requestPromise).rejects.toThrow(); + + // Should have ultimately made 4 requests (1 initial + 3 retries) + const calls = fetchMock.calls(expectedPath); + expect(calls).toHaveLength(4); + }); + }); + }); + + it("should retry on Failed to fetch connection errors", async () => { + let callCount = 0; + fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => { + callCount++; + if (callCount == 2) { + return { + status: 200, + body: "{}", + }; + } else { + throw new Error("Failed to fetch"); + } + }); + + const outgoingRequest = new KeysUploadRequest("1234", "{}"); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + await Promise.all([requestPromise, jest.runAllTimersAsync()]); + + const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload"); + expect(calls).toHaveLength(2); + expect(olmMachine.markRequestAsSent).toHaveBeenCalled(); + }); + + it("should retry to send to-device", async () => { + let callCount = 0; + const testBody = '{ "messages": { "user": {"device": "bar" }}}'; + const outgoingRequest = new ToDeviceRequest("1234", "custom.type", "12345", testBody); + + fetchMock.put("express:/_matrix/client/v3/sendToDevice/:type/:txnId", (url, opts) => { + callCount++; + if (callCount == 2) { + return { + status: 200, + body: "{}", + }; + } else { + throw new Error("Failed to fetch"); + } + }); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + await Promise.all([requestPromise, jest.runAllTimersAsync()]); + + const calls = fetchMock.calls("express:/_matrix/client/v3/sendToDevice/:type/:txnId"); + expect(calls).toHaveLength(2); + expect(olmMachine.markRequestAsSent).toHaveBeenCalled(); + }); + + it("should retry to call with UIA", async () => { + let callCount = 0; + const testBody = '{ "foo": "bar" }'; + const outgoingRequest = new UploadSigningKeysRequest(testBody); + + fetchMock.post("path:/_matrix/client/v3/keys/device_signing/upload", (url, opts) => { + callCount++; + if (callCount == 2) { + return { + status: 200, + body: "{}", + }; + } else { + throw new Error("Failed to fetch"); + } + }); + const authCallback: UIAuthCallback = async (makeRequest) => { + return await makeRequest({ type: "test" }); + }; + const requestPromise = processor.makeOutgoingRequest(outgoingRequest, authCallback); + + await Promise.all([requestPromise, jest.runAllTimersAsync()]); + + const calls = fetchMock.calls("path:/_matrix/client/v3/keys/device_signing/upload"); + expect(calls).toHaveLength(2); + // Will not mark as sent as it's a UIA request + }); + + it("should retry on respect server cool down on LIMIT_EXCEEDED", async () => { + const retryAfterMs = 5000; + let callCount = 0; + + fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => { + callCount++; + if (callCount == 2) { + return { + status: 200, + body: "{}", + }; + } else { + return { + status: 429, + body: { + errcode: "M_LIMIT_EXCEEDED", + error: "Too many requests", + retry_after_ms: retryAfterMs, + }, + }; + } + }); + + const outgoingRequest = new KeysUploadRequest("1234", "{}"); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + // advanced by less than the retryAfterMs + await jest.advanceTimersByTimeAsync(retryAfterMs - 1000); + + // should not have made a second request yet + { + const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload"); + expect(calls).toHaveLength(1); + } + + // advanced by the remaining time + await jest.advanceTimersByTimeAsync(retryAfterMs + 1000); + + await requestPromise; + + const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload"); + expect(calls).toHaveLength(2); + expect(olmMachine.markRequestAsSent).toHaveBeenCalled(); + }); + + describe("Should not retry all sort of errors", () => { + test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => { + const testBody = '{ "foo": "bar" }'; + const outgoingRequest = new RequestClass("1234", testBody); + + // @ts-ignore to avoid having to do if else to switch the method (.put/.post) + fetchMock[expectedMethod.toLowerCase()](expectedPath, { + status: 401, + body: { + errcode: "M_UNKNOWN_TOKEN", + }, + }); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + // Run all timers and wait for the request promise to resolve/reject + await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]); + + await expect(requestPromise).rejects.toThrow(); + + // Should have ultimately made 4 requests (1 initial + 3 retries) + const calls = fetchMock.calls(expectedPath); + expect(calls).toHaveLength(1); + }); + }); + + describe("Should retry until it works", () => { + it.each([1, 2, 3, 4])("should succeed if the call number %s is ok", async (successfulCall) => { + let callCount = 0; + fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => { + callCount++; + if (callCount == successfulCall) { + return { + status: 200, + body: "{}", + }; + } else { + return { + status: 500, + body: { error: "Internal server error" }, + }; + } + }); + + const outgoingRequest = new KeysUploadRequest("1234", "{}"); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + await Promise.all([requestPromise, jest.runAllTimersAsync()]); + + const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload"); + expect(calls).toHaveLength(successfulCall); + expect(olmMachine.markRequestAsSent).toHaveBeenCalled(); + }); + }); + }); }); diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index 18b9d6b03fa..a82d329653a 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -15,11 +15,11 @@ limitations under the License. */ import { - OlmMachine, KeysBackupRequest, KeysClaimRequest, KeysQueryRequest, KeysUploadRequest, + OlmMachine, RoomMessageRequest, SignatureUploadRequest, ToDeviceRequest, @@ -27,8 +27,8 @@ import { } from "@matrix-org/matrix-sdk-crypto-wasm"; import { logger } from "../logger"; -import { IHttpOpts, MatrixHttpApi, Method } from "../http-api"; -import { logDuration, QueryDict } from "../utils"; +import { ConnectionError, IHttpOpts, MatrixError, MatrixHttpApi, Method } from "../http-api"; +import { logDuration, QueryDict, sleep } from "../utils"; import { IAuthDict, UIAuthCallback } from "../interactive-auth"; import { UIAResponse } from "../@types/uia"; import { ToDeviceMessageId } from "../@types/event"; @@ -43,6 +43,11 @@ export interface OutgoingRequest { readonly type: number; } +// A list of HTTP status codes that we should retry on. +const retryableHttpStatuses = [429, 500, 502, 503, 504, 525]; +// The default delay to wait before retrying a request. +const DEFAULT_RETRY_DELAY_MS = 1000; + /** * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests * @@ -71,15 +76,15 @@ export class OutgoingRequestProcessor { * for the complete list of request types */ if (msg instanceof KeysUploadRequest) { - resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body); + resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body); } else if (msg instanceof KeysQueryRequest) { - resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body); + resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body); } else if (msg instanceof KeysClaimRequest) { - resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body); + resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body); } else if (msg instanceof SignatureUploadRequest) { - resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body); + resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body); } else if (msg instanceof KeysBackupRequest) { - resp = await this.rawJsonRequest( + resp = await this.fetchWithRetry( Method.Put, "/_matrix/client/v3/room_keys/keys", { version: msg.version }, @@ -91,7 +96,7 @@ export class OutgoingRequestProcessor { const path = `/_matrix/client/v3/rooms/${encodeURIComponent(msg.room_id)}/send/` + `${encodeURIComponent(msg.event_type)}/${encodeURIComponent(msg.txn_id)}`; - resp = await this.rawJsonRequest(Method.Put, path, {}, msg.body); + resp = await this.fetchWithRetry(Method.Put, path, {}, msg.body); } else if (msg instanceof UploadSigningKeysRequest) { await this.makeRequestWithUIA( Method.Post, @@ -154,7 +159,7 @@ export class OutgoingRequestProcessor { const path = `/_matrix/client/v3/sendToDevice/${encodeURIComponent(request.event_type)}/` + encodeURIComponent(request.txn_id); - return await this.rawJsonRequest(Method.Put, path, {}, request.body); + return await this.fetchWithRetry(Method.Put, path, {}, request.body); } private async makeRequestWithUIA( @@ -165,7 +170,7 @@ export class OutgoingRequestProcessor { uiaCallback: UIAuthCallback | undefined, ): Promise { if (!uiaCallback) { - return await this.rawJsonRequest(method, path, queryParams, body); + return await this.fetchWithRetry(method, path, queryParams, body); } const parsedBody = JSON.parse(body); @@ -176,7 +181,7 @@ export class OutgoingRequestProcessor { if (auth !== null) { newBody.auth = auth; } - const resp = await this.rawJsonRequest(method, path, queryParams, JSON.stringify(newBody)); + const resp = await this.fetchWithRetry(method, path, queryParams, JSON.stringify(newBody)); return JSON.parse(resp) as T; }; @@ -201,4 +206,64 @@ export class OutgoingRequestProcessor { return await this.http.authedRequest(method, path, queryParams, body, opts); } + + private async fetchWithRetry( + method: Method, + path: string, + queryParams: QueryDict, + body: string, + maxRetryCount: number = 3, + ): Promise { + let currentRetryCount = 0; + + // eslint-disable-next-line no-constant-condition + while (true) { + try { + return await this.rawJsonRequest(method, path, queryParams, body); + } catch (e) { + if (currentRetryCount >= maxRetryCount) { + // Max number of retries reached, rethrow the error + throw e; + } + + currentRetryCount++; + + const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e); + if (maybeRetryAfter) { + // wait for the specified time and then retry the request + await sleep(maybeRetryAfter); + // continue the loop and retry the request + } else { + throw e; + } + } + } + } + + /** + * Determine if a given error should be retried, and if so, how long to wait before retrying. + * If the error should not be retried, returns undefined. + * + * @param e the error returned by the http stack + * @private + */ + private shouldWaitBeforeRetryingMillis(e: any): number | undefined { + if (e instanceof MatrixError) { + // On rate limited errors, we should retry after the rate limit has expired. + if (e.errcode === "M_LIMIT_EXCEEDED") { + return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS; + } + } + + if (e.httpStatus && retryableHttpStatuses.includes(e.httpStatus)) { + return DEFAULT_RETRY_DELAY_MS; + } + + if (e instanceof ConnectionError) { + return DEFAULT_RETRY_DELAY_MS; + } + + // don't retry + return; + } } From c112dcfa45c157098ccc4a8e0587ebfd0ce76671 Mon Sep 17 00:00:00 2001 From: Valere Date: Mon, 12 Feb 2024 09:07:18 +0100 Subject: [PATCH 2/8] Update doc --- src/rust-crypto/OutgoingRequestProcessor.ts | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index a82d329653a..89558a923c1 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -44,7 +44,23 @@ export interface OutgoingRequest { } // A list of HTTP status codes that we should retry on. -const retryableHttpStatuses = [429, 500, 502, 503, 504, 525]; +// These status codes represent server errors or rate limiting issues. +// Retrying the request after a delay might succeed when the server issue +// is resolved or when the rate limit is reset. +const retryableHttpStatuses = [ + // Too Many Requests + 429, + // Internal Server Error + 500, + // Bad Gateway + 502, + // Service Unavailable (overloaded or down for maintenance) + 503, + // Gateway Timeout + 504, + // SSL Handshake Failed + 525, +]; // The default delay to wait before retrying a request. const DEFAULT_RETRY_DELAY_MS = 1000; @@ -244,8 +260,7 @@ export class OutgoingRequestProcessor { * Determine if a given error should be retried, and if so, how long to wait before retrying. * If the error should not be retried, returns undefined. * - * @param e the error returned by the http stack - * @private + * @param e - the error returned by the http stack */ private shouldWaitBeforeRetryingMillis(e: any): number | undefined { if (e instanceof MatrixError) { From 735f2ee577202a381acd02679778c5ebf2e41219 Mon Sep 17 00:00:00 2001 From: Valere Date: Mon, 12 Feb 2024 13:45:04 +0100 Subject: [PATCH 3/8] Remove 504 from retryable --- spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts | 1 - src/rust-crypto/OutgoingRequestProcessor.ts | 2 -- 2 files changed, 3 deletions(-) diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 081cc866145..8ed794382de 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -302,7 +302,6 @@ describe("OutgoingRequestProcessor", () => { [500, { status: 500, body: { error: "Internal Server Error" } }], [502, { status: 502, body: { error: "Bad Gateway" } }], [503, { status: 503, body: { error: "Service Unavailable" } }], - [504, { status: 504, body: { error: "Gateway Timeout" } }], [525, { status: 525, body: { error: "SSL Handshake Failed" } }], ]; describe.each(retryableErrors)(`When status code is %s`, (_, error) => { diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index 89558a923c1..03b9320ddb8 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -56,8 +56,6 @@ const retryableHttpStatuses = [ 502, // Service Unavailable (overloaded or down for maintenance) 503, - // Gateway Timeout - 504, // SSL Handshake Failed 525, ]; From afa6a0901c01cc53288dff9fed924ed31d45ff68 Mon Sep 17 00:00:00 2001 From: Valere Date: Tue, 13 Feb 2024 13:45:44 +0100 Subject: [PATCH 4/8] Retry all 5xx and clarify client timeouts --- .../OutgoingRequestProcessor.spec.ts | 52 +++++++++++++++++-- src/rust-crypto/OutgoingRequestProcessor.ts | 39 ++++++++------ 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 8ed794382de..409387d3009 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -302,6 +302,14 @@ describe("OutgoingRequestProcessor", () => { [500, { status: 500, body: { error: "Internal Server Error" } }], [502, { status: 502, body: { error: "Bad Gateway" } }], [503, { status: 503, body: { error: "Service Unavailable" } }], + [504, { status: 504, body: { error: "Gateway timeout" } }], + [504, { status: 504, body: { error: "Gateway Timeout" } }], + [505, { status: 505, body: { error: "HTTP Version Not Supported" } }], + [506, { status: 506, body: { error: "Variant Also Negotiates" } }], + [507, { status: 507, body: { error: "Insufficient Storage" } }], + [508, { status: 508, body: { error: "Loop Detected" } }], + [510, { status: 510, body: { error: "Not Extended" } }], + [511, { status: 511, body: { error: "Network Authentication Required" } }], [525, { status: 525, body: { error: "SSL Handshake Failed" } }], ]; describe.each(retryableErrors)(`When status code is %s`, (_, error) => { @@ -323,6 +331,9 @@ describe("OutgoingRequestProcessor", () => { // Should have ultimately made 4 requests (1 initial + 3 retries) const calls = fetchMock.calls(expectedPath); expect(calls).toHaveLength(4); + + // The promise should have been rejected + await expect(requestPromise).rejects.toThrow(); }); }); }); @@ -452,17 +463,47 @@ describe("OutgoingRequestProcessor", () => { expect(olmMachine.markRequestAsSent).toHaveBeenCalled(); }); - describe("Should not retry all sort of errors", () => { + const nonRetryableErrors: Array<[number, { status: number; body: { errcode: string } }]> = [ + [400, { status: 400, body: { errcode: "Bad Request" } }], + [401, { status: 401, body: { errcode: "M_UNKNOWN_TOKEN" } }], + [403, { status: 403, body: { errcode: "M_FORBIDDEN" } }], + ]; + + describe.each(nonRetryableErrors)("Should not retry all sort of errors", (_, error) => { test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => { const testBody = '{ "foo": "bar" }'; const outgoingRequest = new RequestClass("1234", testBody); // @ts-ignore to avoid having to do if else to switch the method (.put/.post) fetchMock[expectedMethod.toLowerCase()](expectedPath, { - status: 401, - body: { - errcode: "M_UNKNOWN_TOKEN", - }, + status: error.status, + body: error.body, + }); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + // Run all timers and wait for the request promise to resolve/reject + await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]); + + await expect(requestPromise).rejects.toThrow(); + + // Should have ultimately made 4 requests (1 initial + 3 retries) + const calls = fetchMock.calls(expectedPath); + expect(calls).toHaveLength(1); + + await expect(requestPromise).rejects.toThrow(); + }); + }); + + describe("Should not retry client timeouts", () => { + test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => { + const testBody = '{ "foo": "bar" }'; + const outgoingRequest = new RequestClass("1234", testBody); + + // @ts-ignore to avoid having to do if else to switch the method (.put/.post) + fetchMock[expectedMethod.toLowerCase()](expectedPath, () => { + // This is what a client timeout error will throw + throw new DOMException("The user aborted a request.", "AbortError"); }); const requestPromise = processor.makeOutgoingRequest(outgoingRequest); @@ -475,6 +516,7 @@ describe("OutgoingRequestProcessor", () => { // Should have ultimately made 4 requests (1 initial + 3 retries) const calls = fetchMock.calls(expectedPath); expect(calls).toHaveLength(1); + await expect(requestPromise).rejects.toThrow(); }); }); diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index 03b9320ddb8..c7520728f1b 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -43,22 +43,6 @@ export interface OutgoingRequest { readonly type: number; } -// A list of HTTP status codes that we should retry on. -// These status codes represent server errors or rate limiting issues. -// Retrying the request after a delay might succeed when the server issue -// is resolved or when the rate limit is reset. -const retryableHttpStatuses = [ - // Too Many Requests - 429, - // Internal Server Error - 500, - // Bad Gateway - 502, - // Service Unavailable (overloaded or down for maintenance) - 503, - // SSL Handshake Failed - 525, -]; // The default delay to wait before retrying a request. const DEFAULT_RETRY_DELAY_MS = 1000; @@ -254,6 +238,24 @@ export class OutgoingRequestProcessor { } } + /** + * Returns true if the request should be retried, false otherwise. + * + * Retrying the request after a delay might succeed when the server issue + * is resolved or when the rate limit is reset. + * @param httpStatus - the HTTP status code of the response + */ + private canRetry(httpStatus: number): boolean { + // Too Many Requests + if (httpStatus === 429) return true; + + // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...) + // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent. + // All keys/signatures uploads are, message and to device are because of txn_id, keys claim in worst case will claim + // several keys but won't cause harm. + return httpStatus >= 500 && httpStatus < 600; + } + /** * Determine if a given error should be retried, and if so, how long to wait before retrying. * If the error should not be retried, returns undefined. @@ -268,10 +270,13 @@ export class OutgoingRequestProcessor { } } - if (e.httpStatus && retryableHttpStatuses.includes(e.httpStatus)) { + if (e.httpStatus && this.canRetry(e.httpStatus)) { return DEFAULT_RETRY_DELAY_MS; } + // Notice that client timeout errors are not ConnectionErrors, they would be `AbortError`. + // Client timeout (AbortError) errors are not retried, the default timout is already + // very high (using browser defaults e.g. 300 or 90 seconds). if (e instanceof ConnectionError) { return DEFAULT_RETRY_DELAY_MS; } From 5a707e1073f04b443e843aaa93e71030d7c7712b Mon Sep 17 00:00:00 2001 From: Valere Date: Fri, 23 Feb 2024 11:17:54 +0100 Subject: [PATCH 5/8] code review cleaning --- .../OutgoingRequestProcessor.spec.ts | 8 +- src/rust-crypto/OutgoingRequestProcessor.ts | 107 +++++++++--------- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 409387d3009..8671dcd0545 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -303,7 +303,6 @@ describe("OutgoingRequestProcessor", () => { [502, { status: 502, body: { error: "Bad Gateway" } }], [503, { status: 503, body: { error: "Service Unavailable" } }], [504, { status: 504, body: { error: "Gateway timeout" } }], - [504, { status: 504, body: { error: "Gateway Timeout" } }], [505, { status: 505, body: { error: "HTTP Version Not Supported" } }], [506, { status: 506, body: { error: "Variant Also Negotiates" } }], [507, { status: 507, body: { error: "Insufficient Storage" } }], @@ -318,8 +317,7 @@ describe("OutgoingRequestProcessor", () => { const testBody = '{ "foo": "bar" }'; const outgoingRequest = new RequestClass("1234", testBody); - // @ts-ignore to avoid having to do if else to switch the method (.put/.post) - fetchMock[expectedMethod.toLowerCase()](expectedPath, error); + fetchMock.mock(expectedPath, error, { method: expectedMethod }); const requestPromise = processor.makeOutgoingRequest(outgoingRequest); @@ -487,7 +485,7 @@ describe("OutgoingRequestProcessor", () => { await expect(requestPromise).rejects.toThrow(); - // Should have ultimately made 4 requests (1 initial + 3 retries) + // Should have only tried once const calls = fetchMock.calls(expectedPath); expect(calls).toHaveLength(1); @@ -513,7 +511,7 @@ describe("OutgoingRequestProcessor", () => { await expect(requestPromise).rejects.toThrow(); - // Should have ultimately made 4 requests (1 initial + 3 retries) + // Should have only tried once const calls = fetchMock.calls(expectedPath); expect(calls).toHaveLength(1); await expect(requestPromise).rejects.toThrow(); diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index c7520728f1b..73f611ded63 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -46,6 +46,9 @@ export interface OutgoingRequest { // The default delay to wait before retrying a request. const DEFAULT_RETRY_DELAY_MS = 1000; +// The http request will be retried at most 4 times if the error is retryable. +const MAX_REQUEST_RETRY_COUNT = 3; + /** * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests * @@ -74,15 +77,15 @@ export class OutgoingRequestProcessor { * for the complete list of request types */ if (msg instanceof KeysUploadRequest) { - resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body); + resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body); } else if (msg instanceof KeysQueryRequest) { - resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body); + resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body); } else if (msg instanceof KeysClaimRequest) { - resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body); + resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body); } else if (msg instanceof SignatureUploadRequest) { - resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body); + resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body); } else if (msg instanceof KeysBackupRequest) { - resp = await this.fetchWithRetry( + resp = await this.requestWithRetry( Method.Put, "/_matrix/client/v3/room_keys/keys", { version: msg.version }, @@ -94,7 +97,7 @@ export class OutgoingRequestProcessor { const path = `/_matrix/client/v3/rooms/${encodeURIComponent(msg.room_id)}/send/` + `${encodeURIComponent(msg.event_type)}/${encodeURIComponent(msg.txn_id)}`; - resp = await this.fetchWithRetry(Method.Put, path, {}, msg.body); + resp = await this.requestWithRetry(Method.Put, path, {}, msg.body); } else if (msg instanceof UploadSigningKeysRequest) { await this.makeRequestWithUIA( Method.Post, @@ -157,7 +160,7 @@ export class OutgoingRequestProcessor { const path = `/_matrix/client/v3/sendToDevice/${encodeURIComponent(request.event_type)}/` + encodeURIComponent(request.txn_id); - return await this.fetchWithRetry(Method.Put, path, {}, request.body); + return await this.requestWithRetry(Method.Put, path, {}, request.body); } private async makeRequestWithUIA( @@ -168,7 +171,7 @@ export class OutgoingRequestProcessor { uiaCallback: UIAuthCallback | undefined, ): Promise { if (!uiaCallback) { - return await this.fetchWithRetry(method, path, queryParams, body); + return await this.requestWithRetry(method, path, queryParams, body); } const parsedBody = JSON.parse(body); @@ -179,7 +182,7 @@ export class OutgoingRequestProcessor { if (auth !== null) { newBody.auth = auth; } - const resp = await this.fetchWithRetry(method, path, queryParams, JSON.stringify(newBody)); + const resp = await this.requestWithRetry(method, path, queryParams, JSON.stringify(newBody)); return JSON.parse(resp) as T; }; @@ -187,30 +190,11 @@ export class OutgoingRequestProcessor { return JSON.stringify(resp); } - private async rawJsonRequest(method: Method, path: string, queryParams: QueryDict, body: string): Promise { - const opts = { - // inhibit the JSON stringification and parsing within HttpApi. - json: false, - - // nevertheless, we are sending, and accept, JSON. - headers: { - "Content-Type": "application/json", - "Accept": "application/json", - }, - - // we use the full prefix - prefix: "", - }; - - return await this.http.authedRequest(method, path, queryParams, body, opts); - } - - private async fetchWithRetry( + private async requestWithRetry( method: Method, path: string, queryParams: QueryDict, body: string, - maxRetryCount: number = 3, ): Promise { let currentRetryCount = 0; @@ -219,41 +203,41 @@ export class OutgoingRequestProcessor { try { return await this.rawJsonRequest(method, path, queryParams, body); } catch (e) { - if (currentRetryCount >= maxRetryCount) { + if (currentRetryCount >= MAX_REQUEST_RETRY_COUNT) { // Max number of retries reached, rethrow the error throw e; } - currentRetryCount++; - const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e); - if (maybeRetryAfter) { - // wait for the specified time and then retry the request - await sleep(maybeRetryAfter); - // continue the loop and retry the request - } else { + if (maybeRetryAfter === undefined) { + // this error is not retryable throw e; } + + currentRetryCount++; + // wait for the specified time and then retry the request + await sleep(maybeRetryAfter); + // continue the loop and retry the request } } } - /** - * Returns true if the request should be retried, false otherwise. - * - * Retrying the request after a delay might succeed when the server issue - * is resolved or when the rate limit is reset. - * @param httpStatus - the HTTP status code of the response - */ - private canRetry(httpStatus: number): boolean { - // Too Many Requests - if (httpStatus === 429) return true; + private async rawJsonRequest(method: Method, path: string, queryParams: QueryDict, body: string): Promise { + const opts = { + // inhibit the JSON stringification and parsing within HttpApi. + json: false, - // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...) - // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent. - // All keys/signatures uploads are, message and to device are because of txn_id, keys claim in worst case will claim - // several keys but won't cause harm. - return httpStatus >= 500 && httpStatus < 600; + // nevertheless, we are sending, and accept, JSON. + headers: { + "Content-Type": "application/json", + "Accept": "application/json", + }, + + // we use the full prefix + prefix: "", + }; + + return await this.http.authedRequest(method, path, queryParams, body, opts); } /** @@ -284,4 +268,23 @@ export class OutgoingRequestProcessor { // don't retry return; } + + /** + * Returns true if the request should be retried, false otherwise. + * + * Retrying the request after a delay might succeed when the server issue + * is resolved or when the rate limit is reset. + * @param httpStatus - the HTTP status code of the response + */ + private canRetry(httpStatus: number): boolean { + // Too Many Requests + if (httpStatus === 429) return true; + + // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...) + // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent. + // * All key/signature uploads are idempotent. + // * Room message and to-device send requests are idempotent because of txn_id. + // * Keys claim in worst case will claim several keys but won't cause harm. + return httpStatus >= 500 && httpStatus < 600; + } } From 01c4d0e3bba26c4a4936d06dced10f8af89bbd44 Mon Sep 17 00:00:00 2001 From: Valere Date: Fri, 23 Feb 2024 11:47:44 +0100 Subject: [PATCH 6/8] do not retry rust request if M_TOO_LARGE --- .../OutgoingRequestProcessor.spec.ts | 25 +++++++++++++++++++ src/rust-crypto/OutgoingRequestProcessor.ts | 6 +++++ 2 files changed, 31 insertions(+) diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 8671dcd0545..557be8070e1 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -336,6 +336,31 @@ describe("OutgoingRequestProcessor", () => { }); }); + it("should not retry if M_TOO_LARGE", async () => { + const testBody = '{ "messages": { "user": {"device": "bar" }}}'; + const outgoingRequest = new ToDeviceRequest("1234", "custom.type", "12345", testBody); + + fetchMock.put("express:/_matrix/client/v3/sendToDevice/:type/:txnId", { + status: 502, + body: { + errcode: "M_TOO_LARGE", + error: "Request too large", + }, + }); + + const requestPromise = processor.makeOutgoingRequest(outgoingRequest); + + await Promise.all([requestPromise.catch(() => {}), jest.runAllTimersAsync()]); + + await expect(requestPromise).rejects.toThrow(); + + const calls = fetchMock.calls("express:/_matrix/client/v3/sendToDevice/:type/:txnId"); + expect(calls).toHaveLength(1); + + // The promise should have been rejected + await expect(requestPromise).rejects.toThrow(); + }); + it("should retry on Failed to fetch connection errors", async () => { let callCount = 0; fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => { diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index 73f611ded63..c93b53de9f0 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -252,6 +252,12 @@ export class OutgoingRequestProcessor { if (e.errcode === "M_LIMIT_EXCEEDED") { return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS; } + + if (e.errcode === "M_TOO_LARGE") { + // The request was too large, we should not retry. + // Could be a 502 or 413 status code as per documentation. + return undefined; + } } if (e.httpStatus && this.canRetry(e.httpStatus)) { From 4dc469b2ee8c14cc01fd3806309e669faf1cb2a6 Mon Sep 17 00:00:00 2001 From: Valere Date: Fri, 23 Feb 2024 13:48:20 +0100 Subject: [PATCH 7/8] refactor use common retry alg between scheduler and rust requests --- .../OutgoingRequestProcessor.spec.ts | 4 +- src/request-retry-utils.ts | 63 +++++++++++++++ src/rust-crypto/OutgoingRequestProcessor.ts | 79 ++----------------- src/scheduler.ts | 32 +------- 4 files changed, 76 insertions(+), 102 deletions(-) create mode 100644 src/request-retry-utils.ts diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts index 557be8070e1..e63243d2913 100644 --- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts +++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts @@ -326,9 +326,9 @@ describe("OutgoingRequestProcessor", () => { await expect(requestPromise).rejects.toThrow(); - // Should have ultimately made 4 requests (1 initial + 3 retries) + // Should have ultimately made 5 requests (1 initial + 4 retries) const calls = fetchMock.calls(expectedPath); - expect(calls).toHaveLength(4); + expect(calls).toHaveLength(5); // The promise should have been rejected await expect(requestPromise).rejects.toThrow(); diff --git a/src/request-retry-utils.ts b/src/request-retry-utils.ts new file mode 100644 index 00000000000..1ec2940dede --- /dev/null +++ b/src/request-retry-utils.ts @@ -0,0 +1,63 @@ +/* +Copyright 2024 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import { ConnectionError } from "./http-api"; + +/** + * Retries events up to 4 times (so 5 including initial call) using exponential backoff. + * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the + * failure was due to a rate limited request, the time specified in the error is returned. + * + * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts. + * + * @param err - The error thrown by the http call + * @param attempts - The current number of attempts + * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.) + */ +export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean = false): number { + if (attempts > 4) { + return -1; // give up + } + + if (err instanceof ConnectionError && !retryConnectionError) { + return -1; + } + + if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) { + // client error; no amount of retrying with save you now. + return -1; + } + + if (err.name === "AbortError") { + // this is a client timeout, that is already very high 60s/80s + // we don't want to retry, as it could do it for very long + return -1; + } + + // if event that we are trying to send is too large in any way then retrying won't help + if (err.name === "M_TOO_LARGE") { + return -1; + } + + if (err.name === "M_LIMIT_EXCEEDED") { + const waitTime = err.data.retry_after_ms; + if (waitTime > 0) { + return waitTime; + } + } + + return 1000 * Math.pow(2, attempts); +} diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index c93b53de9f0..26c6124006f 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -27,11 +27,12 @@ import { } from "@matrix-org/matrix-sdk-crypto-wasm"; import { logger } from "../logger"; -import { ConnectionError, IHttpOpts, MatrixError, MatrixHttpApi, Method } from "../http-api"; +import { IHttpOpts, MatrixHttpApi, Method } from "../http-api"; import { logDuration, QueryDict, sleep } from "../utils"; import { IAuthDict, UIAuthCallback } from "../interactive-auth"; import { UIAResponse } from "../@types/uia"; import { ToDeviceMessageId } from "../@types/event"; +import { calculateRetryBackoff } from "../request-retry-utils"; /** * Common interface for all the request types returned by `OlmMachine.outgoingRequests`. @@ -43,12 +44,6 @@ export interface OutgoingRequest { readonly type: number; } -// The default delay to wait before retrying a request. -const DEFAULT_RETRY_DELAY_MS = 1000; - -// The http request will be retried at most 4 times if the error is retryable. -const MAX_REQUEST_RETRY_COUNT = 3; - /** * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests * @@ -203,20 +198,14 @@ export class OutgoingRequestProcessor { try { return await this.rawJsonRequest(method, path, queryParams, body); } catch (e) { - if (currentRetryCount >= MAX_REQUEST_RETRY_COUNT) { - // Max number of retries reached, rethrow the error - throw e; - } - - const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e); - if (maybeRetryAfter === undefined) { - // this error is not retryable + currentRetryCount++; + const backoff = calculateRetryBackoff(e, currentRetryCount, true); + if (backoff < 0) { + // Max number of retries reached, or error is not retryable. rethrow the error throw e; } - - currentRetryCount++; // wait for the specified time and then retry the request - await sleep(maybeRetryAfter); + await sleep(backoff); // continue the loop and retry the request } } @@ -239,58 +228,4 @@ export class OutgoingRequestProcessor { return await this.http.authedRequest(method, path, queryParams, body, opts); } - - /** - * Determine if a given error should be retried, and if so, how long to wait before retrying. - * If the error should not be retried, returns undefined. - * - * @param e - the error returned by the http stack - */ - private shouldWaitBeforeRetryingMillis(e: any): number | undefined { - if (e instanceof MatrixError) { - // On rate limited errors, we should retry after the rate limit has expired. - if (e.errcode === "M_LIMIT_EXCEEDED") { - return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS; - } - - if (e.errcode === "M_TOO_LARGE") { - // The request was too large, we should not retry. - // Could be a 502 or 413 status code as per documentation. - return undefined; - } - } - - if (e.httpStatus && this.canRetry(e.httpStatus)) { - return DEFAULT_RETRY_DELAY_MS; - } - - // Notice that client timeout errors are not ConnectionErrors, they would be `AbortError`. - // Client timeout (AbortError) errors are not retried, the default timout is already - // very high (using browser defaults e.g. 300 or 90 seconds). - if (e instanceof ConnectionError) { - return DEFAULT_RETRY_DELAY_MS; - } - - // don't retry - return; - } - - /** - * Returns true if the request should be retried, false otherwise. - * - * Retrying the request after a delay might succeed when the server issue - * is resolved or when the rate limit is reset. - * @param httpStatus - the HTTP status code of the response - */ - private canRetry(httpStatus: number): boolean { - // Too Many Requests - if (httpStatus === 429) return true; - - // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...) - // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent. - // * All key/signature uploads are idempotent. - // * Room message and to-device send requests are idempotent because of txn_id. - // * Keys claim in worst case will claim several keys but won't cause harm. - return httpStatus >= 500 && httpStatus < 600; - } } diff --git a/src/scheduler.ts b/src/scheduler.ts index 41612f1c902..557abe84a45 100644 --- a/src/scheduler.ts +++ b/src/scheduler.ts @@ -22,8 +22,9 @@ import { logger } from "./logger"; import { MatrixEvent } from "./models/event"; import { EventType } from "./@types/event"; import { defer, IDeferred, removeElement } from "./utils"; -import { ConnectionError, MatrixError } from "./http-api"; +import { MatrixError } from "./http-api"; import { ISendEventResponse } from "./@types/requests"; +import { calculateRetryBackoff } from "./request-retry-utils"; const DEBUG = false; // set true to enable console logging. @@ -43,38 +44,13 @@ type ProcessFunction = (event: MatrixEvent) => Promise; // eslint-disable-next-line camelcase export class MatrixScheduler { /** - * Retries events up to 4 times using exponential backoff. This produces wait - * times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the - * failure was due to a rate limited request, the time specified in the error is - * waited before being retried. + * Default retry algorithm for the matrix scheduler. Retries events up to 4 times with exponential backoff. * @param attempts - Number of attempts that have been made, including the one that just failed (ie. starting at 1) * @see retryAlgorithm */ // eslint-disable-next-line @typescript-eslint/naming-convention public static RETRY_BACKOFF_RATELIMIT(event: MatrixEvent | null, attempts: number, err: MatrixError): number { - if (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401) { - // client error; no amount of retrying with save you now. - return -1; - } - if (err instanceof ConnectionError) { - return -1; - } - - // if event that we are trying to send is too large in any way then retrying won't help - if (err.name === "M_TOO_LARGE") { - return -1; - } - - if (err.name === "M_LIMIT_EXCEEDED") { - const waitTime = err.data.retry_after_ms; - if (waitTime > 0) { - return waitTime; - } - } - if (attempts > 4) { - return -1; // give up - } - return 1000 * Math.pow(2, attempts); + return calculateRetryBackoff(err, attempts, false); } /** From f57912af01f5c781cec3414b64c2f6ee89fac62f Mon Sep 17 00:00:00 2001 From: Valere Date: Mon, 26 Feb 2024 14:39:29 +0100 Subject: [PATCH 8/8] Code review, cleaning and doc --- src/http-api/utils.ts | 46 +++++++++++++++ src/request-retry-utils.ts | 63 --------------------- src/rust-crypto/OutgoingRequestProcessor.ts | 4 +- src/scheduler.ts | 3 +- 4 files changed, 48 insertions(+), 68 deletions(-) delete mode 100644 src/request-retry-utils.ts diff --git a/src/http-api/utils.ts b/src/http-api/utils.ts index c49be740ef6..0b3c3554ffa 100644 --- a/src/http-api/utils.ts +++ b/src/http-api/utils.ts @@ -151,3 +151,49 @@ export async function retryNetworkOperation(maxAttempts: number, callback: () } throw lastConnectionError; } + +/** + * Calculate the backoff time for a request retry attempt. + * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the + * failure was due to a rate limited request, the time specified in the error is returned. + * + * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts. + * + * @param err - The error thrown by the http call + * @param attempts - The number of attempts made so far, including the one that just failed. + * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.) + */ +export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean): number { + if (attempts > 4) { + return -1; // give up + } + + if (err instanceof ConnectionError && !retryConnectionError) { + return -1; + } + + if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) { + // client error; no amount of retrying will save you now. + return -1; + } + + if (err.name === "AbortError") { + // this is a client timeout, that is already very high 60s/80s + // we don't want to retry, as it could do it for very long + return -1; + } + + // If we are trying to send an event (or similar) that is too large in any way, then retrying won't help + if (err.name === "M_TOO_LARGE") { + return -1; + } + + if (err.name === "M_LIMIT_EXCEEDED") { + const waitTime = err.data.retry_after_ms; + if (waitTime > 0) { + return waitTime; + } + } + + return 1000 * Math.pow(2, attempts); +} diff --git a/src/request-retry-utils.ts b/src/request-retry-utils.ts deleted file mode 100644 index 1ec2940dede..00000000000 --- a/src/request-retry-utils.ts +++ /dev/null @@ -1,63 +0,0 @@ -/* -Copyright 2024 The Matrix.org Foundation C.I.C. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -import { ConnectionError } from "./http-api"; - -/** - * Retries events up to 4 times (so 5 including initial call) using exponential backoff. - * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the - * failure was due to a rate limited request, the time specified in the error is returned. - * - * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts. - * - * @param err - The error thrown by the http call - * @param attempts - The current number of attempts - * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.) - */ -export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean = false): number { - if (attempts > 4) { - return -1; // give up - } - - if (err instanceof ConnectionError && !retryConnectionError) { - return -1; - } - - if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) { - // client error; no amount of retrying with save you now. - return -1; - } - - if (err.name === "AbortError") { - // this is a client timeout, that is already very high 60s/80s - // we don't want to retry, as it could do it for very long - return -1; - } - - // if event that we are trying to send is too large in any way then retrying won't help - if (err.name === "M_TOO_LARGE") { - return -1; - } - - if (err.name === "M_LIMIT_EXCEEDED") { - const waitTime = err.data.retry_after_ms; - if (waitTime > 0) { - return waitTime; - } - } - - return 1000 * Math.pow(2, attempts); -} diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts index 26c6124006f..8e7e15584d5 100644 --- a/src/rust-crypto/OutgoingRequestProcessor.ts +++ b/src/rust-crypto/OutgoingRequestProcessor.ts @@ -27,12 +27,11 @@ import { } from "@matrix-org/matrix-sdk-crypto-wasm"; import { logger } from "../logger"; -import { IHttpOpts, MatrixHttpApi, Method } from "../http-api"; +import { calculateRetryBackoff, IHttpOpts, MatrixHttpApi, Method } from "../http-api"; import { logDuration, QueryDict, sleep } from "../utils"; import { IAuthDict, UIAuthCallback } from "../interactive-auth"; import { UIAResponse } from "../@types/uia"; import { ToDeviceMessageId } from "../@types/event"; -import { calculateRetryBackoff } from "../request-retry-utils"; /** * Common interface for all the request types returned by `OlmMachine.outgoingRequests`. @@ -206,7 +205,6 @@ export class OutgoingRequestProcessor { } // wait for the specified time and then retry the request await sleep(backoff); - // continue the loop and retry the request } } } diff --git a/src/scheduler.ts b/src/scheduler.ts index 557abe84a45..6dfd212c72c 100644 --- a/src/scheduler.ts +++ b/src/scheduler.ts @@ -22,9 +22,8 @@ import { logger } from "./logger"; import { MatrixEvent } from "./models/event"; import { EventType } from "./@types/event"; import { defer, IDeferred, removeElement } from "./utils"; -import { MatrixError } from "./http-api"; +import { calculateRetryBackoff, MatrixError } from "./http-api"; import { ISendEventResponse } from "./@types/requests"; -import { calculateRetryBackoff } from "./request-retry-utils"; const DEBUG = false; // set true to enable console logging.