diff --git a/src/pyodide/internal/loadPackage.ts b/src/pyodide/internal/loadPackage.ts index fb8bc2480d4..37e7d86752c 100644 --- a/src/pyodide/internal/loadPackage.ts +++ b/src/pyodide/internal/loadPackage.ts @@ -9,16 +9,21 @@ * that contains all the packages ready to go. */ -import { default as LOCKFILE } from 'pyodide-internal:generated/pyodide-lock.json'; -import { WORKERD_INDEX_URL } from 'pyodide-internal:metadata'; import { - SITE_PACKAGES, + WORKERD_INDEX_URL, + LOCKFILE, LOAD_WHEELS_FROM_R2, + LOAD_WHEELS_FROM_ARTIFACT_BUNDLER, + PACKAGES_VERSION, +} from 'pyodide-internal:metadata'; +import { + SITE_PACKAGES, getSitePackagesPath, } from 'pyodide-internal:setupPackages'; import { parseTarInfo } from 'pyodide-internal:tar'; import { default as DiskCache } from 'pyodide-internal:disk_cache'; import { createTarFS } from 'pyodide-internal:tarfs'; +import { default as ArtifactBundler } from 'pyodide-internal:artifacts'; async function decompressArrayBuffer( arrBuf: ArrayBuffer @@ -33,13 +38,25 @@ async function decompressArrayBuffer( } } -async function loadBundle(requirement: string): Promise<[string, ArrayBuffer]> { +function getFilenameOfPackage(requirement: string): string { + const obj = LOCKFILE['packages'][requirement]; + if (!obj) { + throw new Error('Requirement ' + requirement + ' not found in lockfile'); + } + + return obj.file_name; +} + +// loadBundleFromR2 loads the package from the internet (through fetch) and uses the DiskCache as +// a backing store. This is only used in local dev. +async function loadBundleFromR2(requirement: string): Promise { // first check if the disk cache has what we want - const filename = LOCKFILE['packages'][requirement]['file_name']; + const filename = getFilenameOfPackage(requirement); const cached = DiskCache.get(filename); if (cached) { const decompressed = await decompressArrayBuffer(cached); - return [requirement, decompressed]; + const reader = new ArrayBufferReader(decompressed); + return reader; } // we didn't find it in the disk cache, continue with original fetch @@ -50,7 +67,22 @@ async function loadBundle(requirement: string): Promise<[string, ArrayBuffer]> { const decompressed = await decompressArrayBuffer(compressed); DiskCache.put(filename, compressed); - return [requirement, decompressed]; + const reader = new ArrayBufferReader(decompressed); + return reader; +} + +async function loadBundleFromArtifactBundler( + requirement: string +): Promise { + const filename = getFilenameOfPackage(requirement); + const fullPath = 'python-package-bucket/' + PACKAGES_VERSION + '/' + filename; + const reader = ArtifactBundler.getPackage(fullPath); + if (!reader) { + throw new Error( + 'Failed to get package ' + fullPath + ' from ArtifactBundler' + ); + } + return reader; } /** @@ -73,22 +105,23 @@ class ArrayBufferReader { } } -export async function loadPackages(Module: Module, requirements: Set) { - if (!LOAD_WHEELS_FROM_R2) return; - - let loadPromises = []; +async function loadPackagesImpl( + Module: Module, + requirements: Set, + loadBundle: (req: string) => Promise +) { + let loadPromises: Promise<[string, Reader]>[] = []; let loading = []; for (const req of requirements) { if (SITE_PACKAGES.loadedRequirements.has(req)) continue; - loadPromises.push(loadBundle(req)); + loadPromises.push(loadBundle(req).then((r) => [req, r])); loading.push(req); } console.log('Loading ' + loading.join(', ')); const buffers = await Promise.all(loadPromises); - for (const [requirement, buffer] of buffers) { - const reader = new ArrayBufferReader(buffer); + for (const [requirement, reader] of buffers) { const [tarInfo, soFiles] = parseTarInfo(reader); SITE_PACKAGES.addSmallBundle(tarInfo, soFiles, requirement); } @@ -100,3 +133,11 @@ export async function loadPackages(Module: Module, requirements: Set) { const info = SITE_PACKAGES.rootInfo; Module.FS.mount(tarFS, { info }, path); } + +export async function loadPackages(Module: Module, requirements: Set) { + if (LOAD_WHEELS_FROM_R2) { + await loadPackagesImpl(Module, requirements, loadBundleFromR2); + } else if (LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) { + await loadPackagesImpl(Module, requirements, loadBundleFromArtifactBundler); + } +} diff --git a/src/pyodide/internal/metadata.ts b/src/pyodide/internal/metadata.ts index 70b4fadea1d..662748b5b7d 100644 --- a/src/pyodide/internal/metadata.ts +++ b/src/pyodide/internal/metadata.ts @@ -1,5 +1,4 @@ import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata'; -export { default as LOCKFILE } from 'pyodide-internal:generated/pyodide-lock.json'; import { default as PYODIDE_BUCKET } from 'pyodide-internal:generated/pyodide-bucket.json'; import { default as ArtifactBundler } from 'pyodide-internal:artifacts'; @@ -9,6 +8,13 @@ export const SHOULD_SNAPSHOT_TO_DISK = MetadataReader.shouldSnapshotToDisk(); export const IS_CREATING_BASELINE_SNAPSHOT = MetadataReader.isCreatingBaselineSnapshot(); export const WORKERD_INDEX_URL = PYODIDE_BUCKET.PYODIDE_PACKAGE_BUCKET_URL; +export const LOAD_WHEELS_FROM_R2: boolean = IS_WORKERD; +export const LOAD_WHEELS_FROM_ARTIFACT_BUNDLER = + MetadataReader.shouldUsePackagesInArtifactBundler(); +export const PACKAGES_VERSION = MetadataReader.getPackagesVersion(); +export const LOCKFILE: PackageLock = JSON.parse( + MetadataReader.getPackagesLock() +); export const REQUIREMENTS = MetadataReader.getRequirements(); export const MAIN_MODULE_NAME = MetadataReader.getMainModule(); export const MEMORY_SNAPSHOT_READER = MetadataReader.hasMemorySnapshot() diff --git a/src/pyodide/internal/setupPackages.ts b/src/pyodide/internal/setupPackages.ts index 9734902cabb..fe386f714a4 100644 --- a/src/pyodide/internal/setupPackages.ts +++ b/src/pyodide/internal/setupPackages.ts @@ -1,8 +1,12 @@ import { parseTarInfo } from 'pyodide-internal:tar'; import { createTarFS } from 'pyodide-internal:tarfs'; import { createMetadataFS } from 'pyodide-internal:metadatafs'; -import { default as LOCKFILE } from 'pyodide-internal:generated/pyodide-lock.json'; -import { REQUIREMENTS, WORKERD_INDEX_URL } from 'pyodide-internal:metadata'; +import { + REQUIREMENTS, + LOAD_WHEELS_FROM_R2, + LOCKFILE, + LOAD_WHEELS_FROM_ARTIFACT_BUNDLER, +} from 'pyodide-internal:metadata'; import { simpleRunPython } from 'pyodide-internal:util'; const canonicalizeNameRegex = /[-_.]+/g; @@ -122,22 +126,25 @@ class SitePackagesDir { * This also returns the list of soFiles in the resulting site-packages * directory so we can preload them. */ -export function buildSitePackages( - requirements: Set -): [SitePackagesDir, boolean] { +export function buildSitePackages(requirements: Set): SitePackagesDir { const [bigTarInfo, bigTarSoFiles] = parseTarInfo(); - let LOAD_WHEELS_FROM_R2 = true; let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]); - if (bigTarInfo.children!.size > 10) { - LOAD_WHEELS_FROM_R2 = false; + + // Currently, we include all packages within the big bundle in Edgeworker. + // During this transitionary period, we add the option (via autogate) + // to load packages from GCS (in which case they are accessible through the ArtifactBundler) + // or to simply use the packages within the big bundle. The latter is not ideal + // since we're locked to a specific packages version, so we will want to move away + // from it eventually. + if (!LOAD_WHEELS_FROM_R2 && !LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) { requirements.forEach((r) => requirementsInBigBundle.add(r)); } const res = new SitePackagesDir(); res.addBigBundle(bigTarInfo, bigTarSoFiles, requirementsInBigBundle); - return [res, LOAD_WHEELS_FROM_R2]; + return res; } /** @@ -188,8 +195,8 @@ export function mountLib(Module: Module, info: TarFSInfo): void { const site_packages = getSitePackagesPath(Module); Module.FS.mkdirTree(site_packages); Module.FS.mkdirTree('/session/metadata'); - if (!LOAD_WHEELS_FROM_R2) { - // if we are not loading additional wheels from R2, then we're done + if (!LOAD_WHEELS_FROM_R2 && !LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) { + // if we are not loading additional wheels, then we're done // with site-packages and we can mount it here. Otherwise, we must mount it in // loadPackages(). Module.FS.mount(tarFS, { info }, site_packages); @@ -253,6 +260,4 @@ function addPackageToLoad( export { REQUIREMENTS }; export const TRANSITIVE_REQUIREMENTS = getTransitiveRequirements(); -export const [SITE_PACKAGES, LOAD_WHEELS_FROM_R2] = buildSitePackages( - TRANSITIVE_REQUIREMENTS -); +export const SITE_PACKAGES = buildSitePackages(TRANSITIVE_REQUIREMENTS); diff --git a/src/pyodide/types/artifacts.d.ts b/src/pyodide/types/artifacts.d.ts index 25dbf38d033..8a1957e716c 100644 --- a/src/pyodide/types/artifacts.d.ts +++ b/src/pyodide/types/artifacts.d.ts @@ -13,6 +13,7 @@ declare namespace ArtifactBundler { const getMemorySnapshotSize: () => number; const disposeMemorySnapshot: () => void; const storeMemorySnapshot: (snap: MemorySnapshotResult) => void; + const getPackage: (path: string) => Reader | null; } export default ArtifactBundler; diff --git a/src/pyodide/types/pyodide-lock.d.ts b/src/pyodide/types/pyodide-lock.d.ts index d769a734a41..b58ab26e2f0 100644 --- a/src/pyodide/types/pyodide-lock.d.ts +++ b/src/pyodide/types/pyodide-lock.d.ts @@ -16,8 +16,3 @@ interface PackageLock { [id: string]: PackageDeclaration; }; } - -declare module 'pyodide-internal:generated/pyodide-lock.json' { - const lock: PackageLock; - export default lock; -} diff --git a/src/pyodide/types/runtime-generated/metadata.d.ts b/src/pyodide/types/runtime-generated/metadata.d.ts index ee0fb110615..1e87c006a0c 100644 --- a/src/pyodide/types/runtime-generated/metadata.d.ts +++ b/src/pyodide/types/runtime-generated/metadata.d.ts @@ -14,6 +14,9 @@ declare namespace MetadataReader { ) => void; const getMemorySnapshotSize: () => number; const disposeMemorySnapshot: () => void; + const shouldUsePackagesInArtifactBundler: () => boolean; + const getPackagesVersion: () => string; + const getPackagesLock: () => string; const read: (index: number, position: number, buffer: Uint8Array) => number; } diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index aa4b606787d..764ffaed223 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -11,10 +11,8 @@ namespace workerd::api::pyodide { const kj::Maybe PyodideBundleManager::getPyodideBundle( kj::StringPtr version) const { - KJ_IF_SOME(t, bundles.lockShared()->find(version)) { - return t.bundle; - } - return kj::none; + return bundles.lockShared()->find(version).map( + [](const MessageBundlePair& t) { return t.bundle; }); } void PyodideBundleManager::setPyodideBundleData( @@ -53,7 +51,7 @@ static int readToTarget( } int PackagesTarReader::read(jsg::Lock& js, int offset, kj::Array buf) { - return readToTarget(PYODIDE_PACKAGES_TAR.get(), offset, buf); + return readToTarget(source, offset, buf); } kj::Array> PyodideMetadataReader::getNames(jsg::Lock& js) { @@ -159,10 +157,13 @@ jsg::Ref makePyodideMetadataReader( names.finish(), contents.finish(), requirements.finish(), + kj::str("20240829.4"), // TODO: hardcoded version & lock + kj::str(PYODIDE_LOCK.toString()), true /* isWorkerd */, false /* isTracing */, snapshotToDisk, createBaselineSnapshot, + false, /* usePackagesInArtifactBundler */ kj::none /* memorySnapshot */ ); // clang-format on diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index 56fc01046ea..fe9abad3d32 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -1,5 +1,6 @@ #pragma once +#include "workerd/util/wait-list.h" #include #include #include @@ -49,8 +50,10 @@ struct PythonConfig { // A function to read a segment of the tar file into a buffer // Set up this way to avoid copying files that aren't accessed. class PackagesTarReader: public jsg::Object { + kj::ArrayPtr source; + public: - PackagesTarReader() = default; + PackagesTarReader(kj::ArrayPtr src = PYODIDE_PACKAGES_TAR.get()): source(src) {}; int read(jsg::Lock& js, int offset, kj::Array buf); @@ -67,10 +70,13 @@ class PyodideMetadataReader: public jsg::Object { kj::Array names; kj::Array> contents; kj::Array requirements; + kj::String packagesVersion; + kj::String packagesLock; bool isWorkerdFlag; bool isTracingFlag; bool snapshotToDisk; bool createBaselineSnapshot; + bool usePackagesInArtifactBundler; kj::Maybe> memorySnapshot; public: @@ -78,19 +84,25 @@ class PyodideMetadataReader: public jsg::Object { kj::Array names, kj::Array> contents, kj::Array requirements, + kj::String packagesVersion, + kj::String packagesLock, bool isWorkerd, bool isTracing, bool snapshotToDisk, bool createBaselineSnapshot, + bool usePackagesInArtifactBundler, kj::Maybe> memorySnapshot) : mainModule(kj::mv(mainModule)), names(kj::mv(names)), contents(kj::mv(contents)), requirements(kj::mv(requirements)), + packagesVersion(kj::mv(packagesVersion)), + packagesLock(kj::mv(packagesLock)), isWorkerdFlag(isWorkerd), isTracingFlag(isTracing), snapshotToDisk(snapshotToDisk), createBaselineSnapshot(createBaselineSnapshot), + usePackagesInArtifactBundler(usePackagesInArtifactBundler), memorySnapshot(kj::mv(memorySnapshot)) {} bool isWorkerd() { @@ -136,6 +148,18 @@ class PyodideMetadataReader: public jsg::Object { } int readMemorySnapshot(int offset, kj::Array buf); + bool shouldUsePackagesInArtifactBundler() { + return usePackagesInArtifactBundler; + } + + kj::String getPackagesVersion() { + return kj::str(packagesVersion); + } + + kj::String getPackagesLock() { + return kj::str(packagesLock); + } + JSG_RESOURCE_TYPE(PyodideMetadataReader) { JSG_METHOD(isWorkerd); JSG_METHOD(isTracing); @@ -149,6 +173,9 @@ class PyodideMetadataReader: public jsg::Object { JSG_METHOD(readMemorySnapshot); JSG_METHOD(disposeMemorySnapshot); JSG_METHOD(shouldSnapshotToDisk); + JSG_METHOD(shouldUsePackagesInArtifactBundler); + JSG_METHOD(getPackagesVersion); + JSG_METHOD(getPackagesLock); JSG_METHOD(isCreatingBaselineSnapshot); } @@ -176,15 +203,22 @@ struct MemorySnapshotResult { // CPU architecture-specific artifacts. The logic for loading these is in getArtifacts. class ArtifactBundler: public jsg::Object { public: + kj::Maybe packageManager; + // ^ lifetime should be contained by lifetime of ArtifactBundler since there is normally one worker set for the whole process. see worker-set.h + // In other words: + // WorkerSet lifetime = PackageManager lifetime and Worker lifetime = ArtifactBundler lifetime and WorkerSet owns and will outlive Worker, so PackageManager outlives ArtifactBundler kj::Maybe storedSnapshot; - ArtifactBundler(kj::Maybe> existingSnapshot) - : storedSnapshot(kj::none), + ArtifactBundler(kj::Maybe packageManager, + kj::Maybe> existingSnapshot) + : packageManager(packageManager), + storedSnapshot(kj::none), existingSnapshot(kj::mv(existingSnapshot)), isValidating(false) {}; ArtifactBundler(bool isValidating = false) - : storedSnapshot(kj::none), + : packageManager(kj::none), + storedSnapshot(kj::none), existingSnapshot(kj::none), isValidating(isValidating) {}; @@ -215,7 +249,13 @@ class ArtifactBundler: public jsg::Object { } static jsg::Ref makeDisabledBundler() { - return jsg::alloc(); + return jsg::alloc(kj::none, kj::none); + } + + // Creates an ArtifactBundler that only grants access to packages, and not a memory snapshot. + static jsg::Ref makePackagesOnlyBundler( + kj::Maybe manager) { + return jsg::alloc(manager, kj::none); } void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { @@ -229,6 +269,16 @@ class ArtifactBundler: public jsg::Object { return false; // TODO(later): Remove this function once we regenerate the bundle. } + kj::Maybe> getPackage(kj::String path) { + KJ_IF_SOME(pacman, packageManager) { + KJ_IF_SOME(ptr, pacman.getPyodidePackage(path)) { + return jsg::alloc(ptr); + } + } + + return kj::none; + } + JSG_RESOURCE_TYPE(ArtifactBundler) { JSG_METHOD(hasMemorySnapshot); JSG_METHOD(getMemorySnapshotSize); @@ -237,6 +287,7 @@ class ArtifactBundler: public jsg::Object { JSG_METHOD(isEwValidating); JSG_METHOD(storeMemorySnapshot); JSG_METHOD(isEnabled); + JSG_METHOD(getPackage); } private: