From c2126a6f5d3960e467360bdc1a42ed629b1a3208 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Fri, 9 Feb 2024 19:07:42 +0100
Subject: [PATCH 1/8] Add basic retry for outgoing requests

---
 .../OutgoingRequestProcessor.spec.ts          | 237 +++++++++++++++++-
 src/rust-crypto/OutgoingRequestProcessor.ts   |  89 ++++++-
 2 files changed, 313 insertions(+), 13 deletions(-)

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 3501ec8faed..081cc866145 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -27,8 +27,9 @@ import {
     UploadSigningKeysRequest,
     ToDeviceRequest,
 } from "@matrix-org/matrix-sdk-crypto-wasm";
+import fetchMock from "fetch-mock-jest";
 
-import { TypedEventEmitter } from "../../../src/models/typed-event-emitter";
+import { TypedEventEmitter } from "../../../src";
 import { HttpApiEvent, HttpApiEventHandlerMap, IHttpOpts, MatrixHttpApi, UIAuthCallback } from "../../../src";
 import { OutgoingRequestProcessor } from "../../../src/rust-crypto/OutgoingRequestProcessor";
 import { defer } from "../../../src/utils";
@@ -274,4 +275,238 @@ describe("OutgoingRequestProcessor", () => {
         // ... and `makeOutgoingRequest` resolves satisfactorily
         await result;
     });
+
+    describe("Should retry requests", () => {
+        beforeEach(() => {
+            jest.useFakeTimers();
+
+            // here we use another httpApi instance in order to use fetchMock
+            const dummyEventEmitter = new TypedEventEmitter<HttpApiEvent, HttpApiEventHandlerMap>();
+            const httpApi = new MatrixHttpApi(dummyEventEmitter, {
+                baseUrl: "https://example.com",
+                prefix: "/_matrix",
+                onlyData: true,
+            });
+
+            processor = new OutgoingRequestProcessor(olmMachine, httpApi);
+        });
+
+        afterEach(() => {
+            jest.useRealTimers();
+            fetchMock.reset();
+        });
+
+        describe("Should retry on retryable errors", () => {
+            const retryableErrors: Array<[number, { status: number; body: { error: string } }]> = [
+                [429, { status: 429, body: { error: "Too Many Requests" } }],
+                [500, { status: 500, body: { error: "Internal Server Error" } }],
+                [502, { status: 502, body: { error: "Bad Gateway" } }],
+                [503, { status: 503, body: { error: "Service Unavailable" } }],
+                [504, { status: 504, body: { error: "Gateway Timeout" } }],
+                [525, { status: 525, body: { error: "SSL Handshake Failed" } }],
+            ];
+            describe.each(retryableErrors)(`When status code is %s`, (_, error) => {
+                test.each(tests)(`for request of type %ss`, async (_, RequestClass, expectedMethod, expectedPath) => {
+                    // first, mock up a request as we might expect to receive it from the Rust layer ...
+                    const testBody = '{ "foo": "bar" }';
+                    const outgoingRequest = new RequestClass("1234", testBody);
+
+                    // @ts-ignore to avoid having to do if else to switch the method (.put/.post)
+                    fetchMock[expectedMethod.toLowerCase()](expectedPath, error);
+
+                    const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+                    // Run all timers and wait for the request promise to resolve/reject
+                    await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]);
+
+                    await expect(requestPromise).rejects.toThrow();
+
+                    // Should have ultimately made 4 requests (1 initial + 3 retries)
+                    const calls = fetchMock.calls(expectedPath);
+                    expect(calls).toHaveLength(4);
+                });
+            });
+        });
+
+        it("should retry on Failed to fetch connection errors", async () => {
+            let callCount = 0;
+            fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => {
+                callCount++;
+                if (callCount == 2) {
+                    return {
+                        status: 200,
+                        body: "{}",
+                    };
+                } else {
+                    throw new Error("Failed to fetch");
+                }
+            });
+
+            const outgoingRequest = new KeysUploadRequest("1234", "{}");
+
+            const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+            await Promise.all([requestPromise, jest.runAllTimersAsync()]);
+
+            const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload");
+            expect(calls).toHaveLength(2);
+            expect(olmMachine.markRequestAsSent).toHaveBeenCalled();
+        });
+
+        it("should retry to send to-device", async () => {
+            let callCount = 0;
+            const testBody = '{ "messages": { "user": {"device": "bar" }}}';
+            const outgoingRequest = new ToDeviceRequest("1234", "custom.type", "12345", testBody);
+
+            fetchMock.put("express:/_matrix/client/v3/sendToDevice/:type/:txnId", (url, opts) => {
+                callCount++;
+                if (callCount == 2) {
+                    return {
+                        status: 200,
+                        body: "{}",
+                    };
+                } else {
+                    throw new Error("Failed to fetch");
+                }
+            });
+
+            const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+            await Promise.all([requestPromise, jest.runAllTimersAsync()]);
+
+            const calls = fetchMock.calls("express:/_matrix/client/v3/sendToDevice/:type/:txnId");
+            expect(calls).toHaveLength(2);
+            expect(olmMachine.markRequestAsSent).toHaveBeenCalled();
+        });
+
+        it("should retry to call with UIA", async () => {
+            let callCount = 0;
+            const testBody = '{ "foo": "bar" }';
+            const outgoingRequest = new UploadSigningKeysRequest(testBody);
+
+            fetchMock.post("path:/_matrix/client/v3/keys/device_signing/upload", (url, opts) => {
+                callCount++;
+                if (callCount == 2) {
+                    return {
+                        status: 200,
+                        body: "{}",
+                    };
+                } else {
+                    throw new Error("Failed to fetch");
+                }
+            });
+            const authCallback: UIAuthCallback<Object> = async (makeRequest) => {
+                return await makeRequest({ type: "test" });
+            };
+            const requestPromise = processor.makeOutgoingRequest(outgoingRequest, authCallback);
+
+            await Promise.all([requestPromise, jest.runAllTimersAsync()]);
+
+            const calls = fetchMock.calls("path:/_matrix/client/v3/keys/device_signing/upload");
+            expect(calls).toHaveLength(2);
+            // Will not mark as sent as it's a UIA request
+        });
+
+        it("should retry on respect server cool down on LIMIT_EXCEEDED", async () => {
+            const retryAfterMs = 5000;
+            let callCount = 0;
+
+            fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => {
+                callCount++;
+                if (callCount == 2) {
+                    return {
+                        status: 200,
+                        body: "{}",
+                    };
+                } else {
+                    return {
+                        status: 429,
+                        body: {
+                            errcode: "M_LIMIT_EXCEEDED",
+                            error: "Too many requests",
+                            retry_after_ms: retryAfterMs,
+                        },
+                    };
+                }
+            });
+
+            const outgoingRequest = new KeysUploadRequest("1234", "{}");
+
+            const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+            // advanced by less than the retryAfterMs
+            await jest.advanceTimersByTimeAsync(retryAfterMs - 1000);
+
+            // should not have made a second request yet
+            {
+                const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload");
+                expect(calls).toHaveLength(1);
+            }
+
+            // advanced by the remaining time
+            await jest.advanceTimersByTimeAsync(retryAfterMs + 1000);
+
+            await requestPromise;
+
+            const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload");
+            expect(calls).toHaveLength(2);
+            expect(olmMachine.markRequestAsSent).toHaveBeenCalled();
+        });
+
+        describe("Should not retry all sort of errors", () => {
+            test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => {
+                const testBody = '{ "foo": "bar" }';
+                const outgoingRequest = new RequestClass("1234", testBody);
+
+                // @ts-ignore to avoid having to do if else to switch the method (.put/.post)
+                fetchMock[expectedMethod.toLowerCase()](expectedPath, {
+                    status: 401,
+                    body: {
+                        errcode: "M_UNKNOWN_TOKEN",
+                    },
+                });
+
+                const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+                // Run all timers and wait for the request promise to resolve/reject
+                await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]);
+
+                await expect(requestPromise).rejects.toThrow();
+
+                // Should have ultimately made 4 requests (1 initial + 3 retries)
+                const calls = fetchMock.calls(expectedPath);
+                expect(calls).toHaveLength(1);
+            });
+        });
+
+        describe("Should retry until it works", () => {
+            it.each([1, 2, 3, 4])("should succeed if the call number %s is ok", async (successfulCall) => {
+                let callCount = 0;
+                fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => {
+                    callCount++;
+                    if (callCount == successfulCall) {
+                        return {
+                            status: 200,
+                            body: "{}",
+                        };
+                    } else {
+                        return {
+                            status: 500,
+                            body: { error: "Internal server error" },
+                        };
+                    }
+                });
+
+                const outgoingRequest = new KeysUploadRequest("1234", "{}");
+
+                const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+                await Promise.all([requestPromise, jest.runAllTimersAsync()]);
+
+                const calls = fetchMock.calls("path:/_matrix/client/v3/keys/upload");
+                expect(calls).toHaveLength(successfulCall);
+                expect(olmMachine.markRequestAsSent).toHaveBeenCalled();
+            });
+        });
+    });
 });
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index 18b9d6b03fa..a82d329653a 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -15,11 +15,11 @@ limitations under the License.
 */
 
 import {
-    OlmMachine,
     KeysBackupRequest,
     KeysClaimRequest,
     KeysQueryRequest,
     KeysUploadRequest,
+    OlmMachine,
     RoomMessageRequest,
     SignatureUploadRequest,
     ToDeviceRequest,
@@ -27,8 +27,8 @@ import {
 } from "@matrix-org/matrix-sdk-crypto-wasm";
 
 import { logger } from "../logger";
-import { IHttpOpts, MatrixHttpApi, Method } from "../http-api";
-import { logDuration, QueryDict } from "../utils";
+import { ConnectionError, IHttpOpts, MatrixError, MatrixHttpApi, Method } from "../http-api";
+import { logDuration, QueryDict, sleep } from "../utils";
 import { IAuthDict, UIAuthCallback } from "../interactive-auth";
 import { UIAResponse } from "../@types/uia";
 import { ToDeviceMessageId } from "../@types/event";
@@ -43,6 +43,11 @@ export interface OutgoingRequest {
     readonly type: number;
 }
 
+// A list of HTTP status codes that we should retry on.
+const retryableHttpStatuses = [429, 500, 502, 503, 504, 525];
+// The default delay to wait before retrying a request.
+const DEFAULT_RETRY_DELAY_MS = 1000;
+
 /**
  * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests
  *
@@ -71,15 +76,15 @@ export class OutgoingRequestProcessor {
          * for the complete list of request types
          */
         if (msg instanceof KeysUploadRequest) {
-            resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body);
+            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body);
         } else if (msg instanceof KeysQueryRequest) {
-            resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body);
+            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body);
         } else if (msg instanceof KeysClaimRequest) {
-            resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body);
+            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body);
         } else if (msg instanceof SignatureUploadRequest) {
-            resp = await this.rawJsonRequest(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body);
+            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body);
         } else if (msg instanceof KeysBackupRequest) {
-            resp = await this.rawJsonRequest(
+            resp = await this.fetchWithRetry(
                 Method.Put,
                 "/_matrix/client/v3/room_keys/keys",
                 { version: msg.version },
@@ -91,7 +96,7 @@ export class OutgoingRequestProcessor {
             const path =
                 `/_matrix/client/v3/rooms/${encodeURIComponent(msg.room_id)}/send/` +
                 `${encodeURIComponent(msg.event_type)}/${encodeURIComponent(msg.txn_id)}`;
-            resp = await this.rawJsonRequest(Method.Put, path, {}, msg.body);
+            resp = await this.fetchWithRetry(Method.Put, path, {}, msg.body);
         } else if (msg instanceof UploadSigningKeysRequest) {
             await this.makeRequestWithUIA(
                 Method.Post,
@@ -154,7 +159,7 @@ export class OutgoingRequestProcessor {
         const path =
             `/_matrix/client/v3/sendToDevice/${encodeURIComponent(request.event_type)}/` +
             encodeURIComponent(request.txn_id);
-        return await this.rawJsonRequest(Method.Put, path, {}, request.body);
+        return await this.fetchWithRetry(Method.Put, path, {}, request.body);
     }
 
     private async makeRequestWithUIA<T>(
@@ -165,7 +170,7 @@ export class OutgoingRequestProcessor {
         uiaCallback: UIAuthCallback<T> | undefined,
     ): Promise<string> {
         if (!uiaCallback) {
-            return await this.rawJsonRequest(method, path, queryParams, body);
+            return await this.fetchWithRetry(method, path, queryParams, body);
         }
 
         const parsedBody = JSON.parse(body);
@@ -176,7 +181,7 @@ export class OutgoingRequestProcessor {
             if (auth !== null) {
                 newBody.auth = auth;
             }
-            const resp = await this.rawJsonRequest(method, path, queryParams, JSON.stringify(newBody));
+            const resp = await this.fetchWithRetry(method, path, queryParams, JSON.stringify(newBody));
             return JSON.parse(resp) as T;
         };
 
@@ -201,4 +206,64 @@ export class OutgoingRequestProcessor {
 
         return await this.http.authedRequest<string>(method, path, queryParams, body, opts);
     }
+
+    private async fetchWithRetry(
+        method: Method,
+        path: string,
+        queryParams: QueryDict,
+        body: string,
+        maxRetryCount: number = 3,
+    ): Promise<string> {
+        let currentRetryCount = 0;
+
+        // eslint-disable-next-line no-constant-condition
+        while (true) {
+            try {
+                return await this.rawJsonRequest(method, path, queryParams, body);
+            } catch (e) {
+                if (currentRetryCount >= maxRetryCount) {
+                    // Max number of retries reached, rethrow the error
+                    throw e;
+                }
+
+                currentRetryCount++;
+
+                const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e);
+                if (maybeRetryAfter) {
+                    // wait for the specified time and then retry the request
+                    await sleep(maybeRetryAfter);
+                    // continue the loop and retry the request
+                } else {
+                    throw e;
+                }
+            }
+        }
+    }
+
+    /**
+     * Determine if a given error should be retried, and if so, how long to wait before retrying.
+     * If the error should not be retried, returns undefined.
+     *
+     * @param e the error returned by the http stack
+     * @private
+     */
+    private shouldWaitBeforeRetryingMillis(e: any): number | undefined {
+        if (e instanceof MatrixError) {
+            // On rate limited errors, we should retry after the rate limit has expired.
+            if (e.errcode === "M_LIMIT_EXCEEDED") {
+                return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS;
+            }
+        }
+
+        if (e.httpStatus && retryableHttpStatuses.includes(e.httpStatus)) {
+            return DEFAULT_RETRY_DELAY_MS;
+        }
+
+        if (e instanceof ConnectionError) {
+            return DEFAULT_RETRY_DELAY_MS;
+        }
+
+        // don't retry
+        return;
+    }
 }

From c112dcfa45c157098ccc4a8e0587ebfd0ce76671 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Mon, 12 Feb 2024 09:07:18 +0100
Subject: [PATCH 2/8] Update doc

---
 src/rust-crypto/OutgoingRequestProcessor.ts | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index a82d329653a..89558a923c1 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -44,7 +44,23 @@ export interface OutgoingRequest {
 }
 
 // A list of HTTP status codes that we should retry on.
-const retryableHttpStatuses = [429, 500, 502, 503, 504, 525];
+// These status codes represent server errors or rate limiting issues.
+// Retrying the request after a delay might succeed when the server issue
+// is resolved or when the rate limit is reset.
+const retryableHttpStatuses = [
+    // Too Many Requests
+    429,
+    // Internal Server Error
+    500,
+    // Bad Gateway
+    502,
+    // Service Unavailable (overloaded or down for maintenance)
+    503,
+    // Gateway Timeout
+    504,
+    // SSL Handshake Failed
+    525,
+];
 // The default delay to wait before retrying a request.
 const DEFAULT_RETRY_DELAY_MS = 1000;
 
@@ -244,8 +260,7 @@ export class OutgoingRequestProcessor {
      * Determine if a given error should be retried, and if so, how long to wait before retrying.
      * If the error should not be retried, returns undefined.
      *
-     * @param e the error returned by the http stack
-     * @private
+     * @param e - the error returned by the http stack
      */
     private shouldWaitBeforeRetryingMillis(e: any): number | undefined {
         if (e instanceof MatrixError) {

From 735f2ee577202a381acd02679778c5ebf2e41219 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Mon, 12 Feb 2024 13:45:04 +0100
Subject: [PATCH 3/8] Remove 504 from retryable

---
 spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts | 1 -
 src/rust-crypto/OutgoingRequestProcessor.ts            | 2 --
 2 files changed, 3 deletions(-)

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 081cc866145..8ed794382de 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -302,7 +302,6 @@ describe("OutgoingRequestProcessor", () => {
                 [500, { status: 500, body: { error: "Internal Server Error" } }],
                 [502, { status: 502, body: { error: "Bad Gateway" } }],
                 [503, { status: 503, body: { error: "Service Unavailable" } }],
-                [504, { status: 504, body: { error: "Gateway Timeout" } }],
                 [525, { status: 525, body: { error: "SSL Handshake Failed" } }],
             ];
             describe.each(retryableErrors)(`When status code is %s`, (_, error) => {
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index 89558a923c1..03b9320ddb8 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -56,8 +56,6 @@ const retryableHttpStatuses = [
     502,
     // Service Unavailable (overloaded or down for maintenance)
     503,
-    // Gateway Timeout
-    504,
     // SSL Handshake Failed
     525,
 ];

From afa6a0901c01cc53288dff9fed924ed31d45ff68 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Tue, 13 Feb 2024 13:45:44 +0100
Subject: [PATCH 4/8] Retry all 5xx and clarify client timeouts

---
 .../OutgoingRequestProcessor.spec.ts          | 52 +++++++++++++++++--
 src/rust-crypto/OutgoingRequestProcessor.ts   | 39 ++++++++------
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 8ed794382de..409387d3009 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -302,6 +302,14 @@ describe("OutgoingRequestProcessor", () => {
                 [500, { status: 500, body: { error: "Internal Server Error" } }],
                 [502, { status: 502, body: { error: "Bad Gateway" } }],
                 [503, { status: 503, body: { error: "Service Unavailable" } }],
+                [504, { status: 504, body: { error: "Gateway timeout" } }],
+                [504, { status: 504, body: { error: "Gateway Timeout" } }],
+                [505, { status: 505, body: { error: "HTTP Version Not Supported" } }],
+                [506, { status: 506, body: { error: "Variant Also Negotiates" } }],
+                [507, { status: 507, body: { error: "Insufficient Storage" } }],
+                [508, { status: 508, body: { error: "Loop Detected" } }],
+                [510, { status: 510, body: { error: "Not Extended" } }],
+                [511, { status: 511, body: { error: "Network Authentication Required" } }],
                 [525, { status: 525, body: { error: "SSL Handshake Failed" } }],
             ];
             describe.each(retryableErrors)(`When status code is %s`, (_, error) => {
@@ -323,6 +331,9 @@ describe("OutgoingRequestProcessor", () => {
                     // Should have ultimately made 4 requests (1 initial + 3 retries)
                     const calls = fetchMock.calls(expectedPath);
                     expect(calls).toHaveLength(4);
+
+                    // The promise should have been rejected
+                    await expect(requestPromise).rejects.toThrow();
                 });
             });
         });
@@ -452,17 +463,47 @@ describe("OutgoingRequestProcessor", () => {
             expect(olmMachine.markRequestAsSent).toHaveBeenCalled();
         });
 
-        describe("Should not retry all sort of errors", () => {
+        const nonRetryableErrors: Array<[number, { status: number; body: { errcode: string } }]> = [
+            [400, { status: 400, body: { errcode: "Bad Request" } }],
+            [401, { status: 401, body: { errcode: "M_UNKNOWN_TOKEN" } }],
+            [403, { status: 403, body: { errcode: "M_FORBIDDEN" } }],
+        ];
+
+        describe.each(nonRetryableErrors)("Should not retry all sort of errors", (_, error) => {
             test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => {
                 const testBody = '{ "foo": "bar" }';
                 const outgoingRequest = new RequestClass("1234", testBody);
 
                 // @ts-ignore to avoid having to do if else to switch the method (.put/.post)
                 fetchMock[expectedMethod.toLowerCase()](expectedPath, {
-                    status: 401,
-                    body: {
-                        errcode: "M_UNKNOWN_TOKEN",
-                    },
+                    status: error.status,
+                    body: error.body,
+                });
+
+                const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+                // Run all timers and wait for the request promise to resolve/reject
+                await Promise.all([jest.runAllTimersAsync(), requestPromise.catch(() => {})]);
+
+                await expect(requestPromise).rejects.toThrow();
+
+                // Should have ultimately made 4 requests (1 initial + 3 retries)
+                const calls = fetchMock.calls(expectedPath);
+                expect(calls).toHaveLength(1);
+
+                await expect(requestPromise).rejects.toThrow();
+            });
+        });
+
+        describe("Should not retry client timeouts", () => {
+            test.each(tests)("for %ss", async (_, RequestClass, expectedMethod, expectedPath) => {
+                const testBody = '{ "foo": "bar" }';
+                const outgoingRequest = new RequestClass("1234", testBody);
+
+                // @ts-ignore to avoid having to do if else to switch the method (.put/.post)
+                fetchMock[expectedMethod.toLowerCase()](expectedPath, () => {
+                    // This is what a client timeout error will throw
+                    throw new DOMException("The user aborted a request.", "AbortError");
                 });
 
                 const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
@@ -475,6 +516,7 @@ describe("OutgoingRequestProcessor", () => {
                 // Should have ultimately made 4 requests (1 initial + 3 retries)
                 const calls = fetchMock.calls(expectedPath);
                 expect(calls).toHaveLength(1);
+                await expect(requestPromise).rejects.toThrow();
             });
         });
 
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index 03b9320ddb8..c7520728f1b 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -43,22 +43,6 @@ export interface OutgoingRequest {
     readonly type: number;
 }
 
-// A list of HTTP status codes that we should retry on.
-// These status codes represent server errors or rate limiting issues.
-// Retrying the request after a delay might succeed when the server issue
-// is resolved or when the rate limit is reset.
-const retryableHttpStatuses = [
-    // Too Many Requests
-    429,
-    // Internal Server Error
-    500,
-    // Bad Gateway
-    502,
-    // Service Unavailable (overloaded or down for maintenance)
-    503,
-    // SSL Handshake Failed
-    525,
-];
 // The default delay to wait before retrying a request.
 const DEFAULT_RETRY_DELAY_MS = 1000;
 
@@ -254,6 +238,24 @@ export class OutgoingRequestProcessor {
         }
     }
 
+    /**
+     * Returns true if the request should be retried, false otherwise.
+     *
+     * Retrying the request after a delay might succeed when the server issue
+     * is resolved or when the rate limit is reset.
+     * @param httpStatus - the HTTP status code of the response
+     */
+    private canRetry(httpStatus: number): boolean {
+        // Too Many Requests
+        if (httpStatus === 429) return true;
+
+        // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...)
+        // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent.
+        // All keys/signatures uploads are, message and to device are because of txn_id, keys claim in worst case will claim
+        // several keys but won't cause harm.
+        return httpStatus >= 500 && httpStatus < 600;
+    }
+
     /**
      * Determine if a given error should be retried, and if so, how long to wait before retrying.
      * If the error should not be retried, returns undefined.
@@ -268,10 +270,13 @@ export class OutgoingRequestProcessor {
             }
         }
 
-        if (e.httpStatus && retryableHttpStatuses.includes(e.httpStatus)) {
+        if (e.httpStatus && this.canRetry(e.httpStatus)) {
             return DEFAULT_RETRY_DELAY_MS;
         }
 
+        // Notice that client timeout errors are not ConnectionErrors, they would be `AbortError`.
+        // Client timeout (AbortError) errors are not retried, the default timout is already
+        // very high (using browser defaults e.g. 300 or 90 seconds).
         if (e instanceof ConnectionError) {
             return DEFAULT_RETRY_DELAY_MS;
         }

From 5a707e1073f04b443e843aaa93e71030d7c7712b Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Fri, 23 Feb 2024 11:17:54 +0100
Subject: [PATCH 5/8] code review cleaning

---
 .../OutgoingRequestProcessor.spec.ts          |   8 +-
 src/rust-crypto/OutgoingRequestProcessor.ts   | 107 +++++++++---------
 2 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 409387d3009..8671dcd0545 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -303,7 +303,6 @@ describe("OutgoingRequestProcessor", () => {
                 [502, { status: 502, body: { error: "Bad Gateway" } }],
                 [503, { status: 503, body: { error: "Service Unavailable" } }],
                 [504, { status: 504, body: { error: "Gateway timeout" } }],
-                [504, { status: 504, body: { error: "Gateway Timeout" } }],
                 [505, { status: 505, body: { error: "HTTP Version Not Supported" } }],
                 [506, { status: 506, body: { error: "Variant Also Negotiates" } }],
                 [507, { status: 507, body: { error: "Insufficient Storage" } }],
@@ -318,8 +317,7 @@ describe("OutgoingRequestProcessor", () => {
                     const testBody = '{ "foo": "bar" }';
                     const outgoingRequest = new RequestClass("1234", testBody);
 
-                    // @ts-ignore to avoid having to do if else to switch the method (.put/.post)
-                    fetchMock[expectedMethod.toLowerCase()](expectedPath, error);
+                    fetchMock.mock(expectedPath, error, { method: expectedMethod });
 
                     const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
 
@@ -487,7 +485,7 @@ describe("OutgoingRequestProcessor", () => {
 
                 await expect(requestPromise).rejects.toThrow();
 
-                // Should have ultimately made 4 requests (1 initial + 3 retries)
+                // Should have only tried once
                 const calls = fetchMock.calls(expectedPath);
                 expect(calls).toHaveLength(1);
 
@@ -513,7 +511,7 @@ describe("OutgoingRequestProcessor", () => {
 
                 await expect(requestPromise).rejects.toThrow();
 
-                // Should have ultimately made 4 requests (1 initial + 3 retries)
+                // Should have only tried once
                 const calls = fetchMock.calls(expectedPath);
                 expect(calls).toHaveLength(1);
                 await expect(requestPromise).rejects.toThrow();
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index c7520728f1b..73f611ded63 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -46,6 +46,9 @@ export interface OutgoingRequest {
 // The default delay to wait before retrying a request.
 const DEFAULT_RETRY_DELAY_MS = 1000;
 
+// The http request will be retried at most 4 times if the error is retryable.
+const MAX_REQUEST_RETRY_COUNT = 3;
+
 /**
  * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests
  *
@@ -74,15 +77,15 @@ export class OutgoingRequestProcessor {
          * for the complete list of request types
          */
         if (msg instanceof KeysUploadRequest) {
-            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body);
+            resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/upload", {}, msg.body);
         } else if (msg instanceof KeysQueryRequest) {
-            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body);
+            resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/query", {}, msg.body);
         } else if (msg instanceof KeysClaimRequest) {
-            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body);
+            resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/claim", {}, msg.body);
         } else if (msg instanceof SignatureUploadRequest) {
-            resp = await this.fetchWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body);
+            resp = await this.requestWithRetry(Method.Post, "/_matrix/client/v3/keys/signatures/upload", {}, msg.body);
         } else if (msg instanceof KeysBackupRequest) {
-            resp = await this.fetchWithRetry(
+            resp = await this.requestWithRetry(
                 Method.Put,
                 "/_matrix/client/v3/room_keys/keys",
                 { version: msg.version },
@@ -94,7 +97,7 @@ export class OutgoingRequestProcessor {
             const path =
                 `/_matrix/client/v3/rooms/${encodeURIComponent(msg.room_id)}/send/` +
                 `${encodeURIComponent(msg.event_type)}/${encodeURIComponent(msg.txn_id)}`;
-            resp = await this.fetchWithRetry(Method.Put, path, {}, msg.body);
+            resp = await this.requestWithRetry(Method.Put, path, {}, msg.body);
         } else if (msg instanceof UploadSigningKeysRequest) {
             await this.makeRequestWithUIA(
                 Method.Post,
@@ -157,7 +160,7 @@ export class OutgoingRequestProcessor {
         const path =
             `/_matrix/client/v3/sendToDevice/${encodeURIComponent(request.event_type)}/` +
             encodeURIComponent(request.txn_id);
-        return await this.fetchWithRetry(Method.Put, path, {}, request.body);
+        return await this.requestWithRetry(Method.Put, path, {}, request.body);
     }
 
     private async makeRequestWithUIA<T>(
@@ -168,7 +171,7 @@ export class OutgoingRequestProcessor {
         uiaCallback: UIAuthCallback<T> | undefined,
     ): Promise<string> {
         if (!uiaCallback) {
-            return await this.fetchWithRetry(method, path, queryParams, body);
+            return await this.requestWithRetry(method, path, queryParams, body);
         }
 
         const parsedBody = JSON.parse(body);
@@ -179,7 +182,7 @@ export class OutgoingRequestProcessor {
             if (auth !== null) {
                 newBody.auth = auth;
             }
-            const resp = await this.fetchWithRetry(method, path, queryParams, JSON.stringify(newBody));
+            const resp = await this.requestWithRetry(method, path, queryParams, JSON.stringify(newBody));
             return JSON.parse(resp) as T;
         };
 
@@ -187,30 +190,11 @@ export class OutgoingRequestProcessor {
         return JSON.stringify(resp);
     }
 
-    private async rawJsonRequest(method: Method, path: string, queryParams: QueryDict, body: string): Promise<string> {
-        const opts = {
-            // inhibit the JSON stringification and parsing within HttpApi.
-            json: false,
-
-            // nevertheless, we are sending, and accept, JSON.
-            headers: {
-                "Content-Type": "application/json",
-                "Accept": "application/json",
-            },
-
-            // we use the full prefix
-            prefix: "",
-        };
-
-        return await this.http.authedRequest<string>(method, path, queryParams, body, opts);
-    }
-
-    private async fetchWithRetry(
+    private async requestWithRetry(
         method: Method,
         path: string,
         queryParams: QueryDict,
         body: string,
-        maxRetryCount: number = 3,
     ): Promise<string> {
         let currentRetryCount = 0;
 
@@ -219,41 +203,41 @@ export class OutgoingRequestProcessor {
             try {
                 return await this.rawJsonRequest(method, path, queryParams, body);
             } catch (e) {
-                if (currentRetryCount >= maxRetryCount) {
+                if (currentRetryCount >= MAX_REQUEST_RETRY_COUNT) {
                     // Max number of retries reached, rethrow the error
                     throw e;
                 }
 
-                currentRetryCount++;
-
                 const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e);
-                if (maybeRetryAfter) {
-                    // wait for the specified time and then retry the request
-                    await sleep(maybeRetryAfter);
-                    // continue the loop and retry the request
-                } else {
+                if (maybeRetryAfter === undefined) {
+                    // this error is not retryable
                     throw e;
                 }
+
+                currentRetryCount++;
+                // wait for the specified time and then retry the request
+                await sleep(maybeRetryAfter);
+                // continue the loop and retry the request
             }
         }
     }
 
-    /**
-     * Returns true if the request should be retried, false otherwise.
-     *
-     * Retrying the request after a delay might succeed when the server issue
-     * is resolved or when the rate limit is reset.
-     * @param httpStatus - the HTTP status code of the response
-     */
-    private canRetry(httpStatus: number): boolean {
-        // Too Many Requests
-        if (httpStatus === 429) return true;
+    private async rawJsonRequest(method: Method, path: string, queryParams: QueryDict, body: string): Promise<string> {
+        const opts = {
+            // inhibit the JSON stringification and parsing within HttpApi.
+            json: false,
 
-        // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...)
-        // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent.
-        // All keys/signatures uploads are, message and to device are because of txn_id, keys claim in worst case will claim
-        // several keys but won't cause harm.
-        return httpStatus >= 500 && httpStatus < 600;
+            // nevertheless, we are sending, and accept, JSON.
+            headers: {
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+            },
+
+            // we use the full prefix
+            prefix: "",
+        };
+
+        return await this.http.authedRequest<string>(method, path, queryParams, body, opts);
     }
 
     /**
@@ -284,4 +268,23 @@ export class OutgoingRequestProcessor {
         // don't retry
         return;
     }
+
+    /**
+     * Returns true if the request should be retried, false otherwise.
+     *
+     * Retrying the request after a delay might succeed when the server issue
+     * is resolved or when the rate limit is reset.
+     * @param httpStatus - the HTTP status code of the response
+     */
+    private canRetry(httpStatus: number): boolean {
+        // Too Many Requests
+        if (httpStatus === 429) return true;
+
+        // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...)
+        // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent.
+        //  * All key/signature uploads are idempotent.
+        //  * Room message and to-device send requests are idempotent because of txn_id.
+        //  * Keys claim in worst case will claim several keys but won't cause harm.
+        return httpStatus >= 500 && httpStatus < 600;
+    }
 }

From 01c4d0e3bba26c4a4936d06dced10f8af89bbd44 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Fri, 23 Feb 2024 11:47:44 +0100
Subject: [PATCH 6/8] do not retry rust request if M_TOO_LARGE

---
 .../OutgoingRequestProcessor.spec.ts          | 25 +++++++++++++++++++
 src/rust-crypto/OutgoingRequestProcessor.ts   |  6 +++++
 2 files changed, 31 insertions(+)

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 8671dcd0545..557be8070e1 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -336,6 +336,31 @@ describe("OutgoingRequestProcessor", () => {
             });
         });
 
+        it("should not retry if M_TOO_LARGE", async () => {
+            const testBody = '{ "messages": { "user": {"device": "bar" }}}';
+            const outgoingRequest = new ToDeviceRequest("1234", "custom.type", "12345", testBody);
+
+            fetchMock.put("express:/_matrix/client/v3/sendToDevice/:type/:txnId", {
+                status: 502,
+                body: {
+                    errcode: "M_TOO_LARGE",
+                    error: "Request too large",
+                },
+            });
+
+            const requestPromise = processor.makeOutgoingRequest(outgoingRequest);
+
+            await Promise.all([requestPromise.catch(() => {}), jest.runAllTimersAsync()]);
+
+            await expect(requestPromise).rejects.toThrow();
+
+            const calls = fetchMock.calls("express:/_matrix/client/v3/sendToDevice/:type/:txnId");
+            expect(calls).toHaveLength(1);
+
+            // The promise should have been rejected
+            await expect(requestPromise).rejects.toThrow();
+        });
+
         it("should retry on Failed to fetch connection errors", async () => {
             let callCount = 0;
             fetchMock.post("path:/_matrix/client/v3/keys/upload", (url, opts) => {
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index 73f611ded63..c93b53de9f0 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -252,6 +252,12 @@ export class OutgoingRequestProcessor {
             if (e.errcode === "M_LIMIT_EXCEEDED") {
                 return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS;
             }
+
+            if (e.errcode === "M_TOO_LARGE") {
+                // The request was too large, we should not retry.
+                // Could be a 502 or 413 status code as per documentation.
+                return undefined;
+            }
         }
 
         if (e.httpStatus && this.canRetry(e.httpStatus)) {

From 4dc469b2ee8c14cc01fd3806309e669faf1cb2a6 Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Fri, 23 Feb 2024 13:48:20 +0100
Subject: [PATCH 7/8] refactor use common retry alg between scheduler and rust
 requests

---
 .../OutgoingRequestProcessor.spec.ts          |  4 +-
 src/request-retry-utils.ts                    | 63 +++++++++++++++
 src/rust-crypto/OutgoingRequestProcessor.ts   | 79 ++-----------------
 src/scheduler.ts                              | 32 +-------
 4 files changed, 76 insertions(+), 102 deletions(-)
 create mode 100644 src/request-retry-utils.ts

diff --git a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
index 557be8070e1..e63243d2913 100644
--- a/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
+++ b/spec/unit/rust-crypto/OutgoingRequestProcessor.spec.ts
@@ -326,9 +326,9 @@ describe("OutgoingRequestProcessor", () => {
 
                     await expect(requestPromise).rejects.toThrow();
 
-                    // Should have ultimately made 4 requests (1 initial + 3 retries)
+                    // Should have ultimately made 5 requests (1 initial + 4 retries)
                     const calls = fetchMock.calls(expectedPath);
-                    expect(calls).toHaveLength(4);
+                    expect(calls).toHaveLength(5);
 
                     // The promise should have been rejected
                     await expect(requestPromise).rejects.toThrow();
diff --git a/src/request-retry-utils.ts b/src/request-retry-utils.ts
new file mode 100644
index 00000000000..1ec2940dede
--- /dev/null
+++ b/src/request-retry-utils.ts
@@ -0,0 +1,63 @@
+/*
+Copyright 2024 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import { ConnectionError } from "./http-api";
+
+/**
+ * Retries events up to 4 times (so 5 including initial call) using exponential backoff.
+ * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the
+ * failure was due to a rate limited request, the time specified in the error is returned.
+ *
+ * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts.
+ *
+ * @param err - The error thrown by the http call
+ * @param attempts - The current number of attempts
+ * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.)
+ */
+export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean = false): number {
+    if (attempts > 4) {
+        return -1; // give up
+    }
+
+    if (err instanceof ConnectionError && !retryConnectionError) {
+        return -1;
+    }
+
+    if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) {
+        // client error; no amount of retrying with save you now.
+        return -1;
+    }
+
+    if (err.name === "AbortError") {
+        // this is a client timeout, that is already very high 60s/80s
+        // we don't want to retry, as it could do it for very long
+        return -1;
+    }
+
+    // if event that we are trying to send is too large in any way then retrying won't help
+    if (err.name === "M_TOO_LARGE") {
+        return -1;
+    }
+
+    if (err.name === "M_LIMIT_EXCEEDED") {
+        const waitTime = err.data.retry_after_ms;
+        if (waitTime > 0) {
+            return waitTime;
+        }
+    }
+
+    return 1000 * Math.pow(2, attempts);
+}
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index c93b53de9f0..26c6124006f 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -27,11 +27,12 @@ import {
 } from "@matrix-org/matrix-sdk-crypto-wasm";
 
 import { logger } from "../logger";
-import { ConnectionError, IHttpOpts, MatrixError, MatrixHttpApi, Method } from "../http-api";
+import { IHttpOpts, MatrixHttpApi, Method } from "../http-api";
 import { logDuration, QueryDict, sleep } from "../utils";
 import { IAuthDict, UIAuthCallback } from "../interactive-auth";
 import { UIAResponse } from "../@types/uia";
 import { ToDeviceMessageId } from "../@types/event";
+import { calculateRetryBackoff } from "../request-retry-utils";
 
 /**
  * Common interface for all the request types returned by `OlmMachine.outgoingRequests`.
@@ -43,12 +44,6 @@ export interface OutgoingRequest {
     readonly type: number;
 }
 
-// The default delay to wait before retrying a request.
-const DEFAULT_RETRY_DELAY_MS = 1000;
-
-// The http request will be retried at most 4 times if the error is retryable.
-const MAX_REQUEST_RETRY_COUNT = 3;
-
 /**
  * OutgoingRequestManager: turns `OutgoingRequest`s from the rust sdk into HTTP requests
  *
@@ -203,20 +198,14 @@ export class OutgoingRequestProcessor {
             try {
                 return await this.rawJsonRequest(method, path, queryParams, body);
             } catch (e) {
-                if (currentRetryCount >= MAX_REQUEST_RETRY_COUNT) {
-                    // Max number of retries reached, rethrow the error
-                    throw e;
-                }
-
-                const maybeRetryAfter = this.shouldWaitBeforeRetryingMillis(e);
-                if (maybeRetryAfter === undefined) {
-                    // this error is not retryable
+                currentRetryCount++;
+                const backoff = calculateRetryBackoff(e, currentRetryCount, true);
+                if (backoff < 0) {
+                    // Max number of retries reached, or error is not retryable. rethrow the error
                     throw e;
                 }
-
-                currentRetryCount++;
                 // wait for the specified time and then retry the request
-                await sleep(maybeRetryAfter);
+                await sleep(backoff);
                 // continue the loop and retry the request
             }
         }
@@ -239,58 +228,4 @@ export class OutgoingRequestProcessor {
 
         return await this.http.authedRequest<string>(method, path, queryParams, body, opts);
     }
-
-    /**
-     * Determine if a given error should be retried, and if so, how long to wait before retrying.
-     * If the error should not be retried, returns undefined.
-     *
-     * @param e - the error returned by the http stack
-     */
-    private shouldWaitBeforeRetryingMillis(e: any): number | undefined {
-        if (e instanceof MatrixError) {
-            // On rate limited errors, we should retry after the rate limit has expired.
-            if (e.errcode === "M_LIMIT_EXCEEDED") {
-                return e.data.retry_after_ms ?? DEFAULT_RETRY_DELAY_MS;
-            }
-
-            if (e.errcode === "M_TOO_LARGE") {
-                // The request was too large, we should not retry.
-                // Could be a 502 or 413 status code as per documentation.
-                return undefined;
-            }
-        }
-
-        if (e.httpStatus && this.canRetry(e.httpStatus)) {
-            return DEFAULT_RETRY_DELAY_MS;
-        }
-
-        // Notice that client timeout errors are not ConnectionErrors, they would be `AbortError`.
-        // Client timeout (AbortError) errors are not retried, the default timout is already
-        // very high (using browser defaults e.g. 300 or 90 seconds).
-        if (e instanceof ConnectionError) {
-            return DEFAULT_RETRY_DELAY_MS;
-        }
-
-        // don't retry
-        return;
-    }
-
-    /**
-     * Returns true if the request should be retried, false otherwise.
-     *
-     * Retrying the request after a delay might succeed when the server issue
-     * is resolved or when the rate limit is reset.
-     * @param httpStatus - the HTTP status code of the response
-     */
-    private canRetry(httpStatus: number): boolean {
-        // Too Many Requests
-        if (httpStatus === 429) return true;
-
-        // 5xx Errors (Bad Gateway, Service Unavailable, Internal Server Error ...)
-        // This includes gateway timeout (504) and it's ok because all the requests made here are idempotent.
-        //  * All key/signature uploads are idempotent.
-        //  * Room message and to-device send requests are idempotent because of txn_id.
-        //  * Keys claim in worst case will claim several keys but won't cause harm.
-        return httpStatus >= 500 && httpStatus < 600;
-    }
 }
diff --git a/src/scheduler.ts b/src/scheduler.ts
index 41612f1c902..557abe84a45 100644
--- a/src/scheduler.ts
+++ b/src/scheduler.ts
@@ -22,8 +22,9 @@ import { logger } from "./logger";
 import { MatrixEvent } from "./models/event";
 import { EventType } from "./@types/event";
 import { defer, IDeferred, removeElement } from "./utils";
-import { ConnectionError, MatrixError } from "./http-api";
+import { MatrixError } from "./http-api";
 import { ISendEventResponse } from "./@types/requests";
+import { calculateRetryBackoff } from "./request-retry-utils";
 
 const DEBUG = false; // set true to enable console logging.
 
@@ -43,38 +44,13 @@ type ProcessFunction<T> = (event: MatrixEvent) => Promise<T>;
 // eslint-disable-next-line camelcase
 export class MatrixScheduler<T = ISendEventResponse> {
     /**
-     * Retries events up to 4 times using exponential backoff. This produces wait
-     * times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the
-     * failure was due to a rate limited request, the time specified in the error is
-     * waited before being retried.
+     * Default retry algorithm for the matrix scheduler. Retries events up to 4 times with exponential backoff.
      * @param attempts - Number of attempts that have been made, including the one that just failed (ie. starting at 1)
      * @see retryAlgorithm
      */
     // eslint-disable-next-line @typescript-eslint/naming-convention
     public static RETRY_BACKOFF_RATELIMIT(event: MatrixEvent | null, attempts: number, err: MatrixError): number {
-        if (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401) {
-            // client error; no amount of retrying with save you now.
-            return -1;
-        }
-        if (err instanceof ConnectionError) {
-            return -1;
-        }
-
-        // if event that we are trying to send is too large in any way then retrying won't help
-        if (err.name === "M_TOO_LARGE") {
-            return -1;
-        }
-
-        if (err.name === "M_LIMIT_EXCEEDED") {
-            const waitTime = err.data.retry_after_ms;
-            if (waitTime > 0) {
-                return waitTime;
-            }
-        }
-        if (attempts > 4) {
-            return -1; // give up
-        }
-        return 1000 * Math.pow(2, attempts);
+        return calculateRetryBackoff(err, attempts, false);
     }
 
     /**

From f57912af01f5c781cec3414b64c2f6ee89fac62f Mon Sep 17 00:00:00 2001
From: Valere <bill.carson@valrsoft.com>
Date: Mon, 26 Feb 2024 14:39:29 +0100
Subject: [PATCH 8/8] Code review, cleaning and doc

---
 src/http-api/utils.ts                       | 46 +++++++++++++++
 src/request-retry-utils.ts                  | 63 ---------------------
 src/rust-crypto/OutgoingRequestProcessor.ts |  4 +-
 src/scheduler.ts                            |  3 +-
 4 files changed, 48 insertions(+), 68 deletions(-)
 delete mode 100644 src/request-retry-utils.ts

diff --git a/src/http-api/utils.ts b/src/http-api/utils.ts
index c49be740ef6..0b3c3554ffa 100644
--- a/src/http-api/utils.ts
+++ b/src/http-api/utils.ts
@@ -151,3 +151,49 @@ export async function retryNetworkOperation<T>(maxAttempts: number, callback: ()
     }
     throw lastConnectionError;
 }
+
+/**
+ * Calculate the backoff time for a request retry attempt.
+ * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the
+ * failure was due to a rate limited request, the time specified in the error is returned.
+ *
+ * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts.
+ *
+ * @param err - The error thrown by the http call
+ * @param attempts - The number of attempts made so far, including the one that just failed.
+ * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.)
+ */
+export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean): number {
+    if (attempts > 4) {
+        return -1; // give up
+    }
+
+    if (err instanceof ConnectionError && !retryConnectionError) {
+        return -1;
+    }
+
+    if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) {
+        // client error; no amount of retrying will save you now.
+        return -1;
+    }
+
+    if (err.name === "AbortError") {
+        // this is a client timeout, that is already very high 60s/80s
+        // we don't want to retry, as it could do it for very long
+        return -1;
+    }
+
+    // If we are trying to send an event (or similar) that is too large in any way, then retrying won't help
+    if (err.name === "M_TOO_LARGE") {
+        return -1;
+    }
+
+    if (err.name === "M_LIMIT_EXCEEDED") {
+        const waitTime = err.data.retry_after_ms;
+        if (waitTime > 0) {
+            return waitTime;
+        }
+    }
+
+    return 1000 * Math.pow(2, attempts);
+}
diff --git a/src/request-retry-utils.ts b/src/request-retry-utils.ts
deleted file mode 100644
index 1ec2940dede..00000000000
--- a/src/request-retry-utils.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-Copyright 2024 The Matrix.org Foundation C.I.C.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-import { ConnectionError } from "./http-api";
-
-/**
- * Retries events up to 4 times (so 5 including initial call) using exponential backoff.
- * This produces wait times of 2, 4, 8, and 16 seconds (30s total) after which we give up. If the
- * failure was due to a rate limited request, the time specified in the error is returned.
- *
- * Returns -1 if the error is not retryable, or if we reach the maximum number of attempts.
- *
- * @param err - The error thrown by the http call
- * @param attempts - The current number of attempts
- * @param retryConnectionError - Whether to retry on {@link ConnectionError} (CORS, connection is down, etc.)
- */
-export function calculateRetryBackoff(err: any, attempts: number, retryConnectionError: boolean = false): number {
-    if (attempts > 4) {
-        return -1; // give up
-    }
-
-    if (err instanceof ConnectionError && !retryConnectionError) {
-        return -1;
-    }
-
-    if (err.httpStatus && (err.httpStatus === 400 || err.httpStatus === 403 || err.httpStatus === 401)) {
-        // client error; no amount of retrying with save you now.
-        return -1;
-    }
-
-    if (err.name === "AbortError") {
-        // this is a client timeout, that is already very high 60s/80s
-        // we don't want to retry, as it could do it for very long
-        return -1;
-    }
-
-    // if event that we are trying to send is too large in any way then retrying won't help
-    if (err.name === "M_TOO_LARGE") {
-        return -1;
-    }
-
-    if (err.name === "M_LIMIT_EXCEEDED") {
-        const waitTime = err.data.retry_after_ms;
-        if (waitTime > 0) {
-            return waitTime;
-        }
-    }
-
-    return 1000 * Math.pow(2, attempts);
-}
diff --git a/src/rust-crypto/OutgoingRequestProcessor.ts b/src/rust-crypto/OutgoingRequestProcessor.ts
index 26c6124006f..8e7e15584d5 100644
--- a/src/rust-crypto/OutgoingRequestProcessor.ts
+++ b/src/rust-crypto/OutgoingRequestProcessor.ts
@@ -27,12 +27,11 @@ import {
 } from "@matrix-org/matrix-sdk-crypto-wasm";
 
 import { logger } from "../logger";
-import { IHttpOpts, MatrixHttpApi, Method } from "../http-api";
+import { calculateRetryBackoff, IHttpOpts, MatrixHttpApi, Method } from "../http-api";
 import { logDuration, QueryDict, sleep } from "../utils";
 import { IAuthDict, UIAuthCallback } from "../interactive-auth";
 import { UIAResponse } from "../@types/uia";
 import { ToDeviceMessageId } from "../@types/event";
-import { calculateRetryBackoff } from "../request-retry-utils";
 
 /**
  * Common interface for all the request types returned by `OlmMachine.outgoingRequests`.
@@ -206,7 +205,6 @@ export class OutgoingRequestProcessor {
                 }
                 // wait for the specified time and then retry the request
                 await sleep(backoff);
-                // continue the loop and retry the request
             }
         }
     }
diff --git a/src/scheduler.ts b/src/scheduler.ts
index 557abe84a45..6dfd212c72c 100644
--- a/src/scheduler.ts
+++ b/src/scheduler.ts
@@ -22,9 +22,8 @@ import { logger } from "./logger";
 import { MatrixEvent } from "./models/event";
 import { EventType } from "./@types/event";
 import { defer, IDeferred, removeElement } from "./utils";
-import { MatrixError } from "./http-api";
+import { calculateRetryBackoff, MatrixError } from "./http-api";
 import { ISendEventResponse } from "./@types/requests";
-import { calculateRetryBackoff } from "./request-retry-utils";
 
 const DEBUG = false; // set true to enable console logging.