Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WJ-1196] Add support for S3 presign URLs to upload blobs #1918

Draft
wants to merge 38 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
3bcd2a0
Add new file section to deepwell config.
emmiegit May 5, 2024
4c48597
Add file section to configuration.
emmiegit May 6, 2024
c3b4585
Change field to seconds, not Duration.
emmiegit May 6, 2024
2013d08
Begin BlobService::upload_url().
emmiegit May 6, 2024
43c5c75
Add blob_upload table.
emmiegit May 6, 2024
1e59199
Add timestamp for partial upload pruning.
emmiegit May 6, 2024
9642ba4
Start BlobService changes for presign URL system.
emmiegit May 6, 2024
9484a86
Add file_pending migration and pending model.
emmiegit May 12, 2024
44bce20
Start upload code.
emmiegit May 12, 2024
892d751
Rename file_pending -> blob_pending.
emmiegit May 12, 2024
6c2bae4
Fix compilation.
emmiegit May 12, 2024
d3c3f98
[WIP] Start division between new and edit file uploads.
emmiegit Jun 12, 2024
46dfe30
Add created_at column to blob_pending.
emmiegit Sep 8, 2024
7219b39
Use find_by_id() instead of find().
emmiegit Sep 8, 2024
638495b
Run rustfmt.
emmiegit Sep 9, 2024
2355013
Add FileRevisionService::create_pending().
emmiegit Sep 9, 2024
904d61f
Add FileRevisionService::get_first().
emmiegit Sep 9, 2024
066a8fe
Update comment.
emmiegit Sep 9, 2024
19d44cb
Rename structs.
emmiegit Sep 9, 2024
2f3c28c
Run rustfmt.
emmiegit Sep 9, 2024
cfac455
Add proper StartFileUploadOutput struct.
emmiegit Sep 9, 2024
1d683e3
Reword column clear again.
emmiegit Sep 9, 2024
34379a3
Update comments.
emmiegit Sep 10, 2024
20308c4
Remove dead_code suppression.
emmiegit Sep 10, 2024
c4a3dac
Add TODOs for incomplete file pruning jobs.
emmiegit Sep 10, 2024
6cf5fd3
Add FileRevisionService::finish_upload().
emmiegit Sep 10, 2024
a22bd48
Improve output of finish_new_upload().
emmiegit Sep 10, 2024
65a0b6c
Delete dummy structs.
emmiegit Sep 10, 2024
0a9cbc7
Rename types.
emmiegit Sep 10, 2024
7f1c684
Merge pending jobs.
emmiegit Sep 11, 2024
064276a
Stub out edits for now.
emmiegit Sep 11, 2024
2a99341
Fix build errors.
emmiegit Sep 18, 2024
df1c682
Fix CHECK constraints.
emmiegit Sep 19, 2024
d6876e7
Implement file_upload_* API methods.
emmiegit Sep 20, 2024
fd3c497
Rename file creation structs.
emmiegit Sep 20, 2024
70fbfd1
Rename upload API methods.
emmiegit Sep 20, 2024
e1e3b75
Remove unused struct.
emmiegit Sep 20, 2024
08161de
Address warnings.
emmiegit Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions deepwell/config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,22 @@ minimum-name-bytes = 3
# Set to 0 to disable.
refill-name-change-days = 90


[file]

# The length of paths used for S3 presigned URLs.
#
# The value doesn't particularly matter so long as it is sufficiently long
# to avoid collisions.
#
# Just to be safe, the generation mechanism is the same as for session tokens.
presigned-path-length = 32

# How long a presigned URL lasts before expiry.
#
# The value should only be a few minutes, and no longer than 12 hours.
presigned-expiration-minutes = 5

[message]

# The maximum size of a message's subject line, in bytes.
Expand Down
15 changes: 14 additions & 1 deletion deepwell/migrations/20220906103252_deepwell.sql
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,18 @@ CREATE TABLE page_vote (
CHECK ((disabled_at IS NULL) = (disabled_by IS NULL))
);

--
-- Blobs
--

-- Manages blobs that are being uploaded by the user
CREATE TABLE blob_pending (
pending_file_id BIGSERIAL PRIMARY KEY,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
s3_path TEXT NOT NULL CHECK (length(s3_path) > 1),
presign_url TEXT NOT NULL CHECK (length(presign_url) > 1)
);

--
-- Files
--
Expand Down Expand Up @@ -439,6 +451,7 @@ CREATE TABLE file (
name TEXT NOT NULL,
page_id BIGINT NOT NULL REFERENCES page(page_id),
site_id BIGINT NOT NULL REFERENCES site(site_id),
pending_blob_id BIGINT REFERENCES file_pending(pending_file_id),

UNIQUE (page_id, name, deleted_at)
);
Expand Down Expand Up @@ -514,7 +527,7 @@ CREATE TYPE message_recipient_type AS ENUM (
-- A "record" is the underlying message data, with its contents, attachments,
-- and associated metadata such as sender and recipient(s).
CREATE TABLE message_record (
external_id TEXT PRIMARY KEY,
external_id TEXT PRIMARY KEY, -- ID comes from message_draft
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
drafted_at TIMESTAMP WITH TIME ZONE NOT NULL,
retracted_at TIMESTAMP WITH TIME ZONE,
Expand Down
5 changes: 4 additions & 1 deletion deepwell/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,10 @@ async fn build_module(app_state: ServerState) -> anyhow::Result<RpcModule<Server
register!("blob_get", blob_get);

// Files
register!("file_upload", file_upload);
register!("file_create_start", file_create_start);
register!("file_create_finish", file_create_finish);
register!("file_edit_start", file_edit_start);
register!("file_edit_finish", file_edit_finish);
register!("file_get", file_get);
register!("file_edit", file_edit);
register!("file_delete", file_delete);
Expand Down
16 changes: 16 additions & 0 deletions deepwell/src/config/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ pub struct ConfigFile {
ftml: Ftml,
special_pages: SpecialPages,
user: User,
file: FileSection,
message: Message,
}

Expand Down Expand Up @@ -181,6 +182,14 @@ struct User {
minimum_name_bytes: usize,
}

// NOTE: Name conflict with std::fs::File
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
struct FileSection {
presigned_path_length: usize,
presigned_expiration_minutes: u32,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
struct Message {
Expand Down Expand Up @@ -303,6 +312,11 @@ impl ConfigFile {
refill_name_change_days,
minimum_name_bytes,
},
file:
FileSection {
presigned_path_length,
presigned_expiration_minutes,
},
message:
Message {
maximum_subject_bytes: maximum_message_subject_bytes,
Expand Down Expand Up @@ -424,6 +438,8 @@ impl ConfigFile {
))
},
minimum_name_bytes,
presigned_path_length,
presigned_expiry_secs: presigned_expiration_minutes * 60,
maximum_message_subject_bytes,
maximum_message_body_bytes,
maximum_message_recipients,
Expand Down
6 changes: 6 additions & 0 deletions deepwell/src/config/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ pub struct Config {
/// Minimum length of bytes in a username.
pub minimum_name_bytes: usize,

/// Length of randomly-generated portion of S3 presigned URLs.
pub presigned_path_length: usize,

/// How long S3 presigned URLs will last before expiry.
pub presigned_expiry_secs: u32,

/// Maximum size of the subject line allowed in a direct message.
pub maximum_message_subject_bytes: usize,

Expand Down
71 changes: 54 additions & 17 deletions deepwell/src/endpoints/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
use super::prelude::*;
use crate::models::file::Model as FileModel;
use crate::models::file_revision::Model as FileRevisionModel;
use crate::services::blob::BlobService;
use crate::services::blob::{BlobMetadata, BlobService, GetBlobOutput};
use crate::services::file::{
DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetBlobOutput,
GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile,
RestoreFileOutput, UploadFile, UploadFileOutput,
DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, FinishFileCreation,
FinishFileCreationOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput,
RestoreFile, RestoreFileOutput, StartFileCreation, StartFileCreationOutput,
};
use crate::services::Result;
use crate::web::{Bytes, FileDetails};
Expand All @@ -40,14 +40,19 @@ pub async fn blob_get(
info!("Getting blob for S3 hash");
let hash: Bytes = params.parse()?;
let data = BlobService::get(ctx, hash.as_ref()).await?;
let metadata = BlobService::get_metadata(ctx, hash.as_ref()).await?;

let output = GetBlobOutput {
let BlobMetadata {
mime,
size,
created_at,
} = BlobService::get_metadata(ctx, hash.as_ref()).await?;

Ok(GetBlobOutput {
data,
mime: metadata.mime,
size: metadata.size,
};
Ok(output)
mime,
size,
created_at,
})
}

pub async fn file_get(
Expand Down Expand Up @@ -79,21 +84,53 @@ pub async fn file_get(
}
}

pub async fn file_upload(
pub async fn file_create_start(
ctx: &ServiceContext<'_>,
params: Params<'static>,
) -> Result<StartFileCreationOutput> {
let input: StartFileCreation = params.parse()?;

info!(
"Starting file upload '{}' to page ID {} in site ID {}",
input.name, input.page_id, input.site_id,
);

FileService::start_new_upload(ctx, input).await
}

pub async fn file_create_finish(
ctx: &ServiceContext<'_>,
params: Params<'static>,
) -> Result<UploadFileOutput> {
let input: UploadFile = params.parse()?;
) -> Result<FinishFileCreationOutput> {
let input: FinishFileCreation = params.parse()?;

info!(
"Uploading file '{}' ({} bytes) to page ID {} in site ID {}",
input.name,
input.data.len(),
"Finishing file upload (pending blob ID {} for file ID {} in page ID {} in site ID {}",
input.pending_blob_id,
input.file_id,
input.page_id,
input.site_id,
);

FileService::upload(ctx, input).await
FileService::finish_new_upload(ctx, input).await
}

// TODO
pub async fn file_edit_start(
ctx: &ServiceContext<'_>,
_params: Params<'static>,
) -> Result<()> {
let _ = FileService::start_edit_upload(ctx).await?;
todo!()
}

// TODO
pub async fn file_edit_finish(
ctx: &ServiceContext<'_>,
_params: Params<'static>,
) -> Result<()> {
let _ = FileService::finish_edit_upload(ctx).await?;
todo!()
}

pub async fn file_edit(
Expand Down
21 changes: 21 additions & 0 deletions deepwell/src/models/blob_pending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use sea_orm::entity::prelude::*;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)]
#[sea_orm(table_name = "blob_pending")]
pub struct Model {
#[sea_orm(primary_key)]
pub pending_blob_id: i64,
pub created_at: TimeDateTimeWithTimeZone,

#[sea_orm(column_type = "Text")]
pub s3_path: String,

#[sea_orm(column_type = "Text")]
pub presign_url: String,
}

#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {}

impl ActiveModelBehavior for ActiveModel {}
1 change: 1 addition & 0 deletions deepwell/src/models/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub struct Model {
pub name: String,
pub page_id: i64,
pub site_id: i64,
pub pending_blob_id: Option<i64>,
}

#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
Expand Down
1 change: 1 addition & 0 deletions deepwell/src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub mod prelude;

pub mod alias;
pub mod blob_pending;
pub mod file;
pub mod file_revision;
pub mod filter;
Expand Down
8 changes: 7 additions & 1 deletion deepwell/src/services/blob/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#[allow(unused_imports)]
mod prelude {
pub use super::super::prelude::*;
pub use super::service::{
EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP, PRESIGN_DIRECTORY,
};
pub use super::structs::*;
pub use crate::hash::{blob_hash_to_hex, sha512_hash, BlobHash};
}
Expand All @@ -36,5 +39,8 @@ mod service;
mod structs;

pub use self::mime::MimeAnalyzer;
pub use self::service::BlobService;
pub use self::service::{
BlobService, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP,
PRESIGN_DIRECTORY,
};
pub use self::structs::*;
Loading
Loading