From 2f1ca5020fb69bbbb6338a6d6e60f31e44a2d26a Mon Sep 17 00:00:00 2001 From: corwin Date: Thu, 15 Nov 2018 21:11:29 -0500 Subject: [PATCH] Version 6.2.0.273 Note: This is a pre-release version, future versions of VDO may not support VDO devices created with this version. - Fixed more error path memory leaks in the uds and kvdo modules. - Fixed module loading issues with the spec file on Fedora. - Removed the read cache. - Fixed error handling in preresume. - Converted table line parsing to use existing DM functions. - Fixed a bug which prevented parsing of version 0 table lines. - In order to properly handle version 0 table lines, made no-op physical growth not an error. - Limited the number of logical zones to 60. - Converted to use the kernel's bio zeroing method instead of a VDO specific one. - Added a missing call to flush_cache_page() after writing pages which may be owned by the page cache or a user as required by the kernel. - Added a version 2 table line which uses DM-style optional parameters. - Fixed a bug in the statistics tracking partial I/Os. - Added a maximum discard size table line parameter and removed the corresponding sysfs parameter which applied to all VDO devices. --- CONTRIBUTORS.txt | 3 +- kvdo.spec | 6 +- uds/Makefile | 2 +- vdo/Makefile | 2 +- vdo/base/constants.h | 5 +- vdo/base/extent.c | 6 +- vdo/base/forest.c | 21 +- vdo/base/statistics.h | 2 +- vdo/base/threadConfig.c | 9 +- vdo/base/vdoResize.c | 8 +- vdo/kernel/bio.c | 26 +- vdo/kernel/dataKVIO.c | 172 ++++- vdo/kernel/dataKVIO.h | 46 +- vdo/kernel/deviceConfig.c | 299 ++++++-- vdo/kernel/deviceConfig.h | 12 +- vdo/kernel/dmvdo.c | 80 +-- vdo/kernel/dump.c | 11 +- vdo/kernel/ioSubmitter.c | 28 +- vdo/kernel/ioSubmitter.h | 23 +- vdo/kernel/ioSubmitterInternals.h | 4 +- vdo/kernel/kernelLayer.c | 27 +- vdo/kernel/kernelStatistics.h | 12 - vdo/kernel/kernelTypes.h | 4 +- vdo/kernel/kvio.c | 5 +- vdo/kernel/poolSysfsStats.c | 54 -- vdo/kernel/readCache.c | 1069 ----------------------------- vdo/kernel/readCache.h | 118 ---- vdo/kernel/statusProcfs.c | 3 +- vdo/kernel/sysfs.c | 19 +- vdo/kernel/verify.c | 5 +- 30 files changed, 501 insertions(+), 1580 deletions(-) delete mode 100644 vdo/kernel/readCache.c delete mode 100644 vdo/kernel/readCache.h diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index eca809d4..159c1020 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -31,7 +31,8 @@ The Red Hat VDO Team: Other Contributors: Ji-Hyeon Gim : Updates for FC26/Kernel 4.13 - + Vojtech Trefny + Getting correct size of partitions VDO was originally created at Permabit Technology Corporation, and was subsequently acquired and open-sourced by Red Hat. diff --git a/kvdo.spec b/kvdo.spec index 046adf14..8452918a 100644 --- a/kvdo.spec +++ b/kvdo.spec @@ -1,6 +1,6 @@ %define spec_release 1 %define kmod_name kvdo -%define kmod_driver_version 6.2.0.239 +%define kmod_driver_version 6.2.0.273 %define kmod_rpm_release %{spec_release} %define kmod_kernel_version 3.10.0-693.el7 @@ -85,5 +85,5 @@ rm -rf $RPM_BUILD_ROOT %{_usr}/src/%{kmod_name}-%{version}-%{kmod_driver_version}/* %changelog -* Fri Oct 05 2018 - J. corwin Coburn - 6.2.0.239-1 -HASH(0x1b8ead8) \ No newline at end of file +* Thu Nov 15 2018 - J. corwin Coburn - 6.2.0.273-1 +HASH(0x1d75b98) \ No newline at end of file diff --git a/uds/Makefile b/uds/Makefile index 46e8991f..b8829cb5 100644 --- a/uds/Makefile +++ b/uds/Makefile @@ -1,4 +1,4 @@ -UDS_VERSION = 6.2.0.72 +UDS_VERSION = 6.2.0.74 SOURCES = $(notdir $(wildcard $(src)/*.c)) murmur/MurmurHash3.c SOURCES += $(addprefix util/,$(notdir $(wildcard $(src)/util/*.c))) diff --git a/vdo/Makefile b/vdo/Makefile index 9e1cd3e7..58ca3691 100644 --- a/vdo/Makefile +++ b/vdo/Makefile @@ -1,4 +1,4 @@ -VDO_VERSION = 6.2.0.239 +VDO_VERSION = 6.2.0.273 VDO_VERSION_MAJOR = $(word 1,$(subst ., ,$(VDO_VERSION))) VDO_VERSION_MINOR = $(word 2,$(subst ., ,$(VDO_VERSION))) diff --git a/vdo/base/constants.h b/vdo/base/constants.h index 63962810..c7bd7d7a 100644 --- a/vdo/base/constants.h +++ b/vdo/base/constants.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/constants.h#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/constants.h#2 $ */ #ifndef CONSTANTS_H @@ -54,6 +54,9 @@ enum { **/ LOCK_MAP_CAPACITY = 10000, + /** The maximum number of logical zones */ + MAX_LOGICAL_ZONES = 60, + /** The maximum number of physical zones */ MAX_PHYSICAL_ZONES = 16, diff --git a/vdo/base/extent.c b/vdo/base/extent.c index d26fdfe3..a6fd3aa8 100644 --- a/vdo/base/extent.c +++ b/vdo/base/extent.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/extent.c#2 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/extent.c#3 $ */ #include "extent.h" @@ -54,6 +54,10 @@ int createExtent(PhysicalLayer *layer, result = initializeEnqueueableCompletion(&extent->completion, VDO_EXTENT_COMPLETION, layer); + if (result != VDO_SUCCESS) { + FREE(extent); + return result; + } for (; extent->count < blockCount; extent->count++) { result = layer->createMetadataVIO(layer, vioType, priority, extent, data, diff --git a/vdo/base/forest.c b/vdo/base/forest.c index d50e2c96..8511a33f 100644 --- a/vdo/base/forest.c +++ b/vdo/base/forest.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/forest.c#4 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/forest.c#5 $ */ #include "forest.h" @@ -543,8 +543,19 @@ void traverseForest(BlockMap *map, BlockCount computeForestSize(BlockCount logicalBlocks, RootCount rootCount) { Boundary newSizes; - BlockCount approximateNonLeaves = computeNewPages(rootCount, 0, NULL, - logicalBlocks, &newSizes); - return (approximateNonLeaves - + computeBlockMapPageCount(logicalBlocks - approximateNonLeaves)); + BlockCount approximateNonLeaves + = computeNewPages(rootCount, 0, NULL, logicalBlocks, &newSizes); + + // Exclude the tree roots since those aren't allocated from slabs, + // and also exclude the super-roots, which only exist in memory. + approximateNonLeaves + -= rootCount * (newSizes.levels[BLOCK_MAP_TREE_HEIGHT - 2] + + newSizes.levels[BLOCK_MAP_TREE_HEIGHT - 1]); + + BlockCount approximateLeaves + = computeBlockMapPageCount(logicalBlocks - approximateNonLeaves); + + // This can be a slight over-estimate since the tree will never have to + // address these blocks, so it might be a tiny bit smaller. + return (approximateNonLeaves + approximateLeaves); } diff --git a/vdo/base/statistics.h b/vdo/base/statistics.h index 5feb8aec..63ed667b 100644 --- a/vdo/base/statistics.h +++ b/vdo/base/statistics.h @@ -24,7 +24,7 @@ #include "types.h" enum { - STATISTICS_VERSION = 29, + STATISTICS_VERSION = 30, }; typedef struct { diff --git a/vdo/base/threadConfig.c b/vdo/base/threadConfig.c index a0cf2eaf..4658d6e1 100644 --- a/vdo/base/threadConfig.c +++ b/vdo/base/threadConfig.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/threadConfig.c#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/threadConfig.c#2 $ */ #include "threadConfig.h" @@ -99,6 +99,13 @@ int makeThreadConfig(ZoneCount logicalZoneCount, physicalZoneCount, MAX_PHYSICAL_ZONES); } + if (logicalZoneCount > MAX_LOGICAL_ZONES) { + return logErrorWithStringError(VDO_BAD_CONFIGURATION, + "Logical zone count %u exceeds maximum " + "(%u)", + logicalZoneCount, MAX_LOGICAL_ZONES); + } + ThreadConfig *config; ThreadCount total = logicalZoneCount + physicalZoneCount + hashZoneCount + 2; int result = allocateThreadConfig(logicalZoneCount, physicalZoneCount, diff --git a/vdo/base/vdoResize.c b/vdo/base/vdoResize.c index 87425d93..3e4483b3 100644 --- a/vdo/base/vdoResize.c +++ b/vdo/base/vdoResize.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/vdoResize.c#5 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/vdoResize.c#6 $ */ #include "vdoResize.h" @@ -216,6 +216,12 @@ static void growPhysicalCallback(VDOCompletion *completion) int performGrowPhysical(VDO *vdo, BlockCount newPhysicalBlocks) { BlockCount oldPhysicalBlocks = vdo->config.physicalBlocks; + + // Skip any noop grows. + if (oldPhysicalBlocks == newPhysicalBlocks) { + return VDO_SUCCESS; + } + if (newPhysicalBlocks != getNextVDOLayoutSize(vdo->layout)) { /* * Either the VDO isn't prepared to grow, or it was prepared to grow diff --git a/vdo/kernel/bio.c b/vdo/kernel/bio.c index 24b732df..d583ce6f 100644 --- a/vdo/kernel/bio.c +++ b/vdo/kernel/bio.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/bio.c#3 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/bio.c#5 $ */ #include "bio.h" @@ -29,7 +29,7 @@ #include "recoveryJournal.h" #include "bioIterator.h" -#include "readCache.h" +#include "ioSubmitter.h" /** * Gets the raw buffer from a biovec. @@ -63,6 +63,7 @@ void bioCopyDataOut(BIO *bio, char *dataPtr) (biovec = getNextBiovec(&iter)) != NULL; advanceBioIterator(&iter)) { memcpy(getBufferForBiovec(biovec), dataPtr, biovec->bv_len); + flush_dcache_page(biovec->bv_page); dataPtr += biovec->bv_len; } } @@ -199,26 +200,7 @@ bool bioIsZeroData(BIO *bio) /**********************************************************************/ void bioZeroData(BIO *bio) { - /* - * There's a routine zero_fill_bio exported from the kernel, but - * this is a little faster. - * - * Taking apart what zero_fill_bio does: The HIGHMEM stuff isn't an - * issue for x86_64, so bvec_k{,un}map_irq does no more than we do - * here. On x86 flush_dcache_page doesn't do anything. And the - * memset call there seems to be expanded inline by the compiler as - * a "rep stosb" loop which is slower than the kernel-exported - * memset. - * - * So we're functionally the same, and a little bit faster, this - * way. - */ - struct bio_vec *biovec; - for (BioIterator iter = createBioIterator(bio); - (biovec = getNextBiovec(&iter)) != NULL; - advanceBioIterator(&iter)) { - memset(getBufferForBiovec(biovec), 0, biovec->bv_len); - } + zero_fill_bio(bio); } /**********************************************************************/ diff --git a/vdo/kernel/dataKVIO.c b/vdo/kernel/dataKVIO.c index d219317c..358aae9a 100644 --- a/vdo/kernel/dataKVIO.c +++ b/vdo/kernel/dataKVIO.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dataKVIO.c#10 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dataKVIO.c#13 $ */ #include "dataKVIO.h" @@ -27,6 +27,7 @@ #include "murmur/MurmurHash3.h" #include "dataVIO.h" +#include "compressedBlock.h" #include "hashLock.h" #include "lz4.h" @@ -34,7 +35,7 @@ #include "dedupeIndex.h" #include "kvdoFlush.h" #include "kvio.h" -#include "readCache.h" +#include "ioSubmitter.h" #include "vdoCommon.h" #include "verify.h" @@ -146,7 +147,7 @@ static void kvdoAcknowledgeDataKVIO(DataKVIO *dataKVIO) #endif countBios(&layer->biosAcknowledged, bio); - if (getBioSize(bio) < VDO_BLOCK_SIZE) { + if (dataKVIO->isPartial) { countBios(&layer->biosAcknowledgedPartial, bio); } @@ -164,7 +165,6 @@ static noinline void cleanDataKVIO(DataKVIO *dataKVIO, FreeBufferPointers *fbp) KVIO *kvio = dataKVIOAsKVIO(dataKVIO); kvio->bio = NULL; - runReadCacheReleaseBlock(kvio->layer, &dataKVIO->readBlock); if (unlikely(kvio->vio->trace != NULL)) { maybeLogDataKVIOTrace(dataKVIO); kvioCompletionTap1(dataKVIO); @@ -306,6 +306,123 @@ static void resetUserBio(BIO *bio, int error) #endif } +/** + * Uncompress the data that's just been read and then call back the requesting + * DataKVIO. + * + * @param workItem The DataKVIO requesting the data + **/ +static void uncompressReadBlock(KvdoWorkItem *workItem) +{ + DataKVIO *dataKVIO = workItemAsDataKVIO(workItem); + ReadBlock *readBlock = &dataKVIO->readBlock; + BlockSize blockSize = VDO_BLOCK_SIZE; + + // The DataKVIO's scratch block will be used to contain the + // uncompressed data. + uint16_t fragmentOffset, fragmentSize; + char *compressedData = readBlock->data; + int result = getCompressedBlockFragment(readBlock->mappingState, + compressedData, blockSize, + &fragmentOffset, + &fragmentSize); + if (result != VDO_SUCCESS) { + logDebug("%s: frag err %d", __func__, result); + readBlock->status = result; + readBlock->callback(dataKVIO); + return; + } + + char *fragment = compressedData + fragmentOffset; + int size = LZ4_uncompress_unknownOutputSize(fragment, dataKVIO->scratchBlock, + fragmentSize, blockSize); + if (size == blockSize) { + readBlock->data = dataKVIO->scratchBlock; + } else { + logDebug("%s: lz4 error", __func__); + readBlock->status = VDO_INVALID_FRAGMENT; + } + + readBlock->callback(dataKVIO); +} + +/** + * Now that we have gotten the data from storage, uncompress the data if + * necessary and then call back the requesting DataKVIO. + * + * @param dataKVIO The DataKVIO requesting the data + * @param result The result of the read operation + **/ +static void completeRead(DataKVIO *dataKVIO, int result) +{ + ReadBlock *readBlock = &dataKVIO->readBlock; + readBlock->status = result; + + if ((result == VDO_SUCCESS) && isCompressed(readBlock->mappingState)) { + launchDataKVIOOnCPUQueue(dataKVIO, uncompressReadBlock, NULL, + CPU_Q_ACTION_COMPRESS_BLOCK); + return; + } + + readBlock->callback(dataKVIO); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) +/** + * Callback for a bio doing a read. + * + * @param bio The bio + */ +static void readBioCallback(BIO *bio) +#else +/** + * Callback for a bio doing a read. + * + * @param bio The bio + * @param result The result of the read operation + */ +static void readBioCallback(BIO *bio, int result) +#endif +{ + KVIO *kvio = (KVIO *) bio->bi_private; + DataKVIO *dataKVIO = kvioAsDataKVIO(kvio); + dataKVIO->readBlock.data = dataKVIO->readBlock.buffer; + dataKVIOAddTraceRecord(dataKVIO, THIS_LOCATION(NULL)); + countCompletedBios(bio); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) + completeRead(dataKVIO, getBioResult(bio)); +#else + completeRead(dataKVIO, result); +#endif +} + +/**********************************************************************/ +void kvdoReadBlock(DataVIO *dataVIO, + PhysicalBlockNumber location, + BlockMappingState mappingState, + BioQAction action, + DataKVIOCallback callback) +{ + dataVIOAddTraceRecord(dataVIO, THIS_LOCATION(NULL)); + + DataKVIO *dataKVIO = dataVIOAsDataKVIO(dataVIO); + ReadBlock *readBlock = &dataKVIO->readBlock; + KernelLayer *layer = getLayerFromDataKVIO(dataKVIO); + + readBlock->callback = callback; + readBlock->status = VDO_SUCCESS; + readBlock->mappingState = mappingState; + + BUG_ON(getBIOFromDataKVIO(dataKVIO)->bi_private != &dataKVIO->kvio); + // Read the data directly from the device using the read bio. + BIO *bio = readBlock->bio; + resetBio(bio, layer); + setBioSector(bio, blockToSector(layer, location)); + setBioOperationRead(bio); + bio->bi_end_io = readBioCallback; + submitBio(bio, action); +} + /**********************************************************************/ void kvdoReadDataVIO(DataVIO *dataVIO) { @@ -315,7 +432,7 @@ void kvdoReadDataVIO(DataVIO *dataVIO) if (isCompressed(dataVIO->mapped.state)) { kvdoReadBlock(dataVIO, dataVIO->mapped.pbn, dataVIO->mapped.state, - READ_COMPRESSED_DATA, readDataKVIOReadBlockCallback); + BIO_Q_ACTION_COMPRESSED_DATA, readDataKVIOReadBlockCallback); return; } @@ -374,7 +491,7 @@ void kvdoWriteDataVIO(DataVIO *dataVIO) BIO *bio = kvio->bio; setBioOperationWrite(bio); setBioSector(bio, blockToSector(kvio->layer, dataVIO->newMapped.pbn)); - invalidateCacheAndSubmitBio(kvio, BIO_Q_ACTION_DATA); + submitBio(bio, BIO_Q_ACTION_DATA); } /**********************************************************************/ @@ -557,8 +674,9 @@ static int kvdoCreateKVIOFromBio(KernelLayer *layer, dataKVIO->isPartial = ((getBioSize(bio) < VDO_BLOCK_SIZE) || (dataKVIO->offset != 0)); - DataVIO *dataVIO = &dataKVIO->dataVIO; - if (!dataKVIO->isPartial) { + if (dataKVIO->isPartial) { + countBios(&layer->biosInPartial, bio); + } else { /* * Note that we unconditionally fill in the dataBlock array for * non-read operations. There are places like kvdoCopyVIO that may @@ -575,7 +693,7 @@ static int kvdoCreateKVIOFromBio(KernelLayer *layer, */ memset(dataKVIO->dataBlock, 0, VDO_BLOCK_SIZE); } else if (bio_data_dir(bio) == WRITE) { - dataVIO->isZeroBlock = bioIsZeroData(bio); + dataKVIO->dataVIO.isZeroBlock = bioIsZeroData(bio); // Copy the bio data to a char array so that we can continue to use // the data after we acknowledge the bio. bioCopyDataIn(bio, dataKVIO->dataBlock); @@ -684,10 +802,6 @@ int kvdoLaunchDataKVIOFromBio(KernelLayer *layer, uint64_t arrivalTime, bool hasDiscardPermit) { - if (getBioSize(bio) < VDO_BLOCK_SIZE) { - countBios(&layer->biosInPartial, bio); - } - DataKVIO *dataKVIO = NULL; int result = kvdoCreateKVIOFromBio(layer, bio, arrivalTime, &dataKVIO); @@ -867,26 +981,24 @@ static int allocatePooledDataKVIO(KernelLayer *layer, DataKVIO **dataKVIOPtr) "DataKVIO data bio allocation failure"); } - if (!layer->deviceConfig->readCacheEnabled) { - result = allocateMemory(VDO_BLOCK_SIZE, 0, "kvio read buffer", - &dataKVIO->readBlock.buffer); - if (result != VDO_SUCCESS) { - freePooledDataKVIO(layer, dataKVIO); - return logErrorWithStringError(result, - "DataKVIO read allocation failure"); - } - - result = createBio(layer, dataKVIO->readBlock.buffer, - &dataKVIO->readBlock.bio); - if (result != VDO_SUCCESS) { - freePooledDataKVIO(layer, dataKVIO); - return logErrorWithStringError(result, - "DataKVIO read bio allocation failure"); - } + result = allocateMemory(VDO_BLOCK_SIZE, 0, "kvio read buffer", + &dataKVIO->readBlock.buffer); + if (result != VDO_SUCCESS) { + freePooledDataKVIO(layer, dataKVIO); + return logErrorWithStringError(result, + "DataKVIO read allocation failure"); + } - dataKVIO->readBlock.bio->bi_private = &dataKVIO->kvio; + result = createBio(layer, dataKVIO->readBlock.buffer, + &dataKVIO->readBlock.bio); + if (result != VDO_SUCCESS) { + freePooledDataKVIO(layer, dataKVIO); + return logErrorWithStringError(result, + "DataKVIO read bio allocation failure"); } + dataKVIO->readBlock.bio->bi_private = &dataKVIO->kvio; + result = allocateMemory(VDO_BLOCK_SIZE, 0, "kvio scratch", &dataKVIO->scratchBlock); if (result != VDO_SUCCESS) { diff --git a/vdo/kernel/dataKVIO.h b/vdo/kernel/dataKVIO.h index 48df11f5..6a4e19cf 100644 --- a/vdo/kernel/dataKVIO.h +++ b/vdo/kernel/dataKVIO.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dataKVIO.h#2 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dataKVIO.h#4 $ */ #ifndef DATA_KVIO_H @@ -54,54 +54,32 @@ struct dedupeContext { const UdsChunkName *chunkName; }; -/* Read cache support */ typedef struct { /** * A pointer to a block that holds the data from the last read operation. - * - * Note that this handle is counted as a reference on the read cache entry - * associated with this block pointer, if the read cache exists. **/ char *data; /** - * Temporary storage for doing reads from the underlying device when the - * read cache does not exist. If the read cache exists, this will be NULL. + * Temporary storage for doing reads from the underlying device. **/ char *buffer; /** - * A bio structure wrapping the buffer, if the read cache does not exist. - * If the read cache exists, this will be NULL. + * A bio structure wrapping the buffer. **/ BIO *bio; /** * Callback to invoke after completing the read I/O operation. **/ DataKVIOCallback callback; - /** - * Physical block number passed to kvdoReadBlock(). Used only if the read - * cache exists. - **/ - PhysicalBlockNumber pbn; /** * Mapping state passed to kvdoReadBlock(), used to determine whether * the data must be uncompressed. **/ BlockMappingState mappingState; - /** - * The action code (BIO_Q_ACTION_* value) for the I/O work item, used if the - * read request needs to read data from the underlying storage. - * - * Derived from the ReadBlockOperation passed to kvdoReadBlock(). - **/ - BioQAction action; /** * The result code of the read attempt. **/ int status; - /** - * The cache entry. If the read cache does not exist, this is always NULL. - **/ - ReadCacheEntry *cacheEntry; } ReadBlock; struct dataKVIO { @@ -378,6 +356,24 @@ void kvdoZeroDataVIO(DataVIO *dataVIO); **/ void kvdoCopyDataVIO(DataVIO *source, DataVIO *destination); +/** + * Fetch the data for a block from storage. The fetched data will be + * uncompressed when the callback is called, and the result of the read + * operation will be stored in the ReadBlock's status field. On success, + * the data will be in the ReadBlock's data pointer. + * + * @param dataVIO The DataVIO to read a block in for + * @param location The physical block number to read from + * @param mappingState The mapping state of the block to read + * @param action The bio queue action + * @param callback The function to call when the read is done + **/ +void kvdoReadBlock(DataVIO *dataVIO, + PhysicalBlockNumber location, + BlockMappingState mappingState, + BioQAction action, + DataKVIOCallback callback); + /** * Implements DataReader. * diff --git a/vdo/kernel/deviceConfig.c b/vdo/kernel/deviceConfig.c index cdbf94bf..e2ea341f 100644 --- a/vdo/kernel/deviceConfig.c +++ b/vdo/kernel/deviceConfig.c @@ -1,4 +1,4 @@ -/* +/** * Copyright (c) 2018 Red Hat, Inc. * * This program is free software; you can redistribute it and/or @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/deviceConfig.c#4 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/deviceConfig.c#9 $ */ #include "deviceConfig.h" @@ -31,12 +31,9 @@ #include "constants.h" -// The index of the pool name within the table line for the two known versions -static const uint8_t POOL_NAME_ARG_INDEX[2] = {8, 10}; - enum { - V0_REQUIRED_ARGC = 10, - V1_REQUIRED_ARGC = 12, + // If we bump this, update the arrays below + TABLE_VERSION = 2, // Arbitrary limit used when parsing thread-count config spec strings THREAD_COUNT_LIMIT = 100, BIO_ROTATION_INTERVAL_LIMIT = 1024, @@ -50,7 +47,9 @@ enum { DEFAULT_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL = 64, }; -static const char TABLE_VERSION_STRING[] = "V1"; +// arrays for handling different table versions +static const uint8_t REQUIRED_ARGC[] = {10, 12, 9}; +static const uint8_t POOL_NAME_ARG_INDEX[] = {8, 10, 8}; /** * Decide the version number from argv. @@ -67,24 +66,36 @@ static int getVersionNumber(int argc, char **errorPtr, TableVersion *versionPtr) { - if (strcmp(argv[0], TABLE_VERSION_STRING) != 0) { - if (argc == V0_REQUIRED_ARGC) { - logWarning("Detected version mismatch between kernel module and tools."); - logWarning("Please consider upgrading management tools to match kernel."); - *versionPtr = 0; - return VDO_SUCCESS; + // version, if it exists, is in a form of V + if (sscanf(argv[0], "V%u", versionPtr) == 1) { + if (*versionPtr < 1 || *versionPtr > TABLE_VERSION) { + *errorPtr = "Unknown version number detected"; + return VDO_BAD_CONFIGURATION; } - *errorPtr = "Incorrect number of arguments for any known format"; - return VDO_BAD_CONFIGURATION; + } else { + // V0 actually has no version number in the table string + *versionPtr = 0; } - if (argc == V1_REQUIRED_ARGC) { - *versionPtr = 1; - return VDO_SUCCESS; + // V0 and V1 have no optional parameters. There will always be + // a parameter for thread config, even if its a "." to show + // its an empty list. + if (*versionPtr <= 1) { + if (argc != REQUIRED_ARGC[*versionPtr]) { + *errorPtr = "Incorrect number of arguments for version"; + return VDO_BAD_CONFIGURATION; + } + } else if (argc < REQUIRED_ARGC[*versionPtr]) { + *errorPtr = "Incorrect number of arguments for version"; + return VDO_BAD_CONFIGURATION; } - *errorPtr = "Incorrect number of arguments"; - return VDO_BAD_CONFIGURATION; + if (*versionPtr != TABLE_VERSION) { + logWarning("Detected version mismatch between kernel module and tools " + " kernel: %d, tool: %d", TABLE_VERSION, *versionPtr); + logWarning("Please consider upgrading management tools to match kernel."); + } + return VDO_SUCCESS; } /**********************************************************************/ @@ -186,7 +197,8 @@ static inline int parseBool(const char *boolStr, * update the configuration data structure. * * If the thread count requested is invalid, a message is logged and - * -EINVAL returned. + * -EINVAL returned. If the thread name is unknown, a message is logged + * but no error is returned. * * @param threadParamType The type of thread specified * @param count The thread count requested @@ -252,9 +264,10 @@ static int processOneThreadConfigSpec(const char *threadParamType, } } - // More will be added eventually. - logError("unknown thread parameter type \"%s\"", threadParamType); - return -EINVAL; + // Don't fail, just log. This will handle version mismatches between + // user mode tools and kernel. + logInfo("unknown thread parameter type \"%s\"", threadParamType); + return VDO_SUCCESS; } /** @@ -302,13 +315,13 @@ static int parseOneThreadConfigSpec(const char *spec, * of the form "typename=number"; the supported type names are "cpu", "ack", * "bio", "bioRotationInterval", "logical", "physical", and "hash". * - * An incorrect format or unknown thread parameter type name is an error. + * If an error occurs during parsing of a single key/value pair, we deem + * it serious enough to stop further parsing. * * This function can't set the "reason" value the caller wants to pass * back, because we'd want to format it to say which field was * invalid, and we can't allocate the "reason" strings dynamically. So - * if an error occurs, we'll just log the details, and pass back - * EINVAL. + * if an error occurs, we'll log the details and pass back an error. * * @param string Thread parameter configuration string * @param config The thread configuration data to update @@ -318,18 +331,166 @@ static int parseOneThreadConfigSpec(const char *spec, static int parseThreadConfigString(const char *string, ThreadCountConfig *config) { + int result = VDO_SUCCESS; + char **specs; - int result = splitString(string, ',', &specs); + if (strcmp(".", string) != 0) { + result = splitString(string, ',', &specs); + if (result != UDS_SUCCESS) { + return result; + } + for (unsigned int i = 0; specs[i] != NULL; i++) { + result = parseOneThreadConfigSpec(specs[i], config); + if (result != VDO_SUCCESS) { + break; + } + } + freeStringArray(specs); + } + return result; +} + +/** + * Process one component of an optional parameter string and + * update the configuration data structure. + * + * If the value requested is invalid, a message is logged and + * -EINVAL returned. If the key is unknown, a message is logged + * but no error is returned. + * + * @param key The optional parameter key name + * @param value The optional parameter value + * @param config The configuration data structure to update + * + * @return VDO_SUCCESS or -EINVAL + **/ +static int processOneKeyValuePair(const char *key, + unsigned int value, + DeviceConfig *config) +{ + // Non thread optional parameters + if (strcmp(key, "maxDiscard") == 0) { + if (value == 0) { + logError("optional parameter error:" + " at least one max discard block required"); + return -EINVAL; + } + // Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 + if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { + logError("optional parameter error: at most %d max discard" + " blocks are allowed", UINT_MAX / VDO_BLOCK_SIZE); + return -EINVAL; + } + config->maxDiscardBlocks = value; + return VDO_SUCCESS; + } + // Handles unknown key names + return processOneThreadConfigSpec(key, value, &config->threadCounts); +} + +/** + * Parse one key/value pair and update the configuration + * data structure. + * + * @param key The optional key name + * @param value The optional value + * @param config The configuration data to be updated + * + * @return VDO_SUCCESS or error + **/ +static int parseOneKeyValuePair(const char *key, + const char *value, + DeviceConfig *config) +{ + unsigned int count; + int result = stringToUInt(value, &count); if (result != UDS_SUCCESS) { + logError("optional config string error: integer value needed, found \"%s\"", + value); return result; } - for (unsigned int i = 0; specs[i] != NULL; i++) { - result = parseOneThreadConfigSpec(specs[i], config); + return processOneKeyValuePair(key, count, config); +} + +/** + * Parse all key/value pairs from a list of arguments. + * + * If an error occurs during parsing of a single key/value pair, we deem + * it serious enough to stop further parsing. + * + * This function can't set the "reason" value the caller wants to pass + * back, because we'd want to format it to say which field was + * invalid, and we can't allocate the "reason" strings dynamically. So + * if an error occurs, we'll log the details and return the error. + * + * @param argc The total number of arguments in list + * @param argv The list of key/value pairs + * @param config The device configuration data to update + * + * @return VDO_SUCCESS or error + **/ +static int parseKeyValuePairs(int argc, + char **argv, + DeviceConfig *config) +{ + int result = VDO_SUCCESS; + while (argc) { + result = parseOneKeyValuePair(argv[0], argv[1], config); if (result != VDO_SUCCESS) { break; } + + argc -= 2; + argv += 2; + } + + return result; +} + +/** + * Parse the configuration string passed in for optional arguments. + * + * For V0/V1 configurations, there will only be one optional parameter; + * the thread configuration. The configuration string should contain + * one or more comma-separated specs of the form "typename=number"; the + * supported type names are "cpu", "ack", "bio", "bioRotationInterval", + * "logical", "physical", and "hash". + * + * For V2 configurations and beyond, there could be any number of + * arguments. They should contain one or more key/value pairs + * separated by a space. + * + * @param argSet The structure holding the arguments to parse + * @param errorPtr Pointer to a buffer to hold the error string + * @param config Pointer to device configuration data to update + * + * @return VDO_SUCCESS or error + */ +int parseOptionalArguments(struct dm_arg_set *argSet, + char **errorPtr, + DeviceConfig *config) +{ + int result = VDO_SUCCESS; + + if (config->version == 0 || config->version == 1) { + result = parseThreadConfigString(argSet->argv[0], + &config->threadCounts); + if (result != VDO_SUCCESS) { + *errorPtr = "Invalid thread-count configuration"; + return VDO_BAD_CONFIGURATION; + } + } else { + if ((argSet->argc % 2) != 0) { + *errorPtr = "Odd number of optional arguments given but they" + " should be pairs"; + return VDO_BAD_CONFIGURATION; + } + result = parseKeyValuePairs(argSet->argc, argSet->argv, config); + if (result != VDO_SUCCESS) { + *errorPtr = "Invalid optional argument configuration"; + return VDO_BAD_CONFIGURATION; + } } - freeStringArray(specs); return result; } @@ -390,8 +551,12 @@ int parseDeviceConfig(int argc, .physicalZones = 0, .hashZones = 0, }; + config->maxDiscardBlocks = 1; - char **argumentPtr = argv; + struct dm_arg_set argSet; + + argSet.argc = argc; + argSet.argv = argv; result = getVersionNumber(argc, argv, errorPtr, &config->version); if (result != VDO_SUCCESS) { @@ -399,9 +564,12 @@ int parseDeviceConfig(int argc, handleParseError(&config, errorPtr, *errorPtr); return result; } - argumentPtr++; + // Move the arg pointer forward only if the argument was there. + if (config->version >= 1) { + dm_shift_arg(&argSet); + } - result = duplicateString(*argumentPtr++, "parent device name", + result = duplicateString(dm_shift_arg(&argSet), "parent device name", &config->parentDeviceName); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Could not copy parent device name"); @@ -409,8 +577,8 @@ int parseDeviceConfig(int argc, } // Get the physical blocks, if known. - if (config->version == 1) { - result = kstrtoull(*argumentPtr++, 10, &config->physicalBlocks); + if (config->version >= 1) { + result = kstrtoull(dm_shift_arg(&argSet), 10, &config->physicalBlocks); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Invalid physical block count"); return VDO_BAD_CONFIGURATION; @@ -419,45 +587,34 @@ int parseDeviceConfig(int argc, // Get the logical block size and validate bool enable512e; - result = parseBool(*argumentPtr++, "512", "4096", &enable512e); + result = parseBool(dm_shift_arg(&argSet), "512", "4096", &enable512e); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Invalid logical block size"); return VDO_BAD_CONFIGURATION; } config->logicalBlockSize = (enable512e ? 512 : 4096); - // Determine whether the read cache is enabled. - result = parseBool(*argumentPtr++, "enabled", "disabled", - &config->readCacheEnabled); - if (result != VDO_SUCCESS) { - handleParseError(&config, errorPtr, "Invalid read cache mode"); - return VDO_BAD_CONFIGURATION; - } - - // Get the number of extra blocks for the read cache. - result = stringToUInt(*argumentPtr++, &config->readCacheExtraBlocks); - if (result != VDO_SUCCESS) { - handleParseError(&config, errorPtr, - "Invalid read cache extra block count"); - return VDO_BAD_CONFIGURATION; + // Skip past the two no longer used read cache options. + if (config->version <= 1) { + dm_consume_args(&argSet, 2); } // Get the page cache size. - result = stringToUInt(*argumentPtr++, &config->cacheSize); + result = stringToUInt(dm_shift_arg(&argSet), &config->cacheSize); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Invalid block map page cache size"); return VDO_BAD_CONFIGURATION; } // Get the block map era length. - result = stringToUInt(*argumentPtr++, &config->blockMapMaximumAge); + result = stringToUInt(dm_shift_arg(&argSet), &config->blockMapMaximumAge); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Invalid block map maximum age"); return VDO_BAD_CONFIGURATION; } // Get the MD RAID5 optimization mode and validate - result = parseBool(*argumentPtr++, "on", "off", + result = parseBool(dm_shift_arg(&argSet), "on", "off", &config->mdRaid5ModeEnabled); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Invalid MD RAID5 mode"); @@ -465,47 +622,40 @@ int parseDeviceConfig(int argc, } // Get the write policy and validate. - if (strcmp(*argumentPtr, "async") == 0) { + if (strcmp(argSet.argv[0], "async") == 0) { config->writePolicy = WRITE_POLICY_ASYNC; - } else if (strcmp(*argumentPtr, "sync") == 0) { + } else if (strcmp(argSet.argv[0], "sync") == 0) { config->writePolicy = WRITE_POLICY_SYNC; - } else if (strcmp(*argumentPtr, "auto") == 0) { + } else if (strcmp(argSet.argv[0], "auto") == 0) { config->writePolicy = WRITE_POLICY_AUTO; } else { handleParseError(&config, errorPtr, "Invalid write policy"); return VDO_BAD_CONFIGURATION; } - argumentPtr++; + dm_shift_arg(&argSet); // Make sure the enum to get the pool name from argv directly is still in // sync with the parsing of the table line. - if (argumentPtr != &argv[POOL_NAME_ARG_INDEX[config->version]]) { + if (&argSet.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) { handleParseError(&config, errorPtr, "Pool name not in expected location"); return VDO_BAD_CONFIGURATION; } // Get the address where the albserver is running. Check for validation // is done in dedupe.c code during startKernelLayer call - result = duplicateString(*argumentPtr++, "pool name", &config->poolName); + result = duplicateString(dm_shift_arg(&argSet), "pool name", + &config->poolName); if (result != VDO_SUCCESS) { handleParseError(&config, errorPtr, "Could not copy pool name"); return VDO_BAD_CONFIGURATION; } - result = duplicateString(*argumentPtr++, "thread config", - &config->threadConfigString); + // Get the optional arguments and validate. + result = parseOptionalArguments(&argSet, errorPtr, config); if (result != VDO_SUCCESS) { - handleParseError(&config, errorPtr, "Could not copy thread config"); - return VDO_BAD_CONFIGURATION; - } - - if (strcmp(".", config->threadConfigString) != 0) { - result = parseThreadConfigString(config->threadConfigString, - &config->threadCounts); - if (result != VDO_SUCCESS) { - handleParseError(&config, errorPtr, "Invalid thread-count configuration"); - return VDO_BAD_CONFIGURATION; - } + // parseOptionalArguments sets errorPtr itself. + handleParseError(&config, errorPtr, *errorPtr); + return result; } /* @@ -556,7 +706,6 @@ void freeDeviceConfig(DeviceConfig **configPtr) dm_put_device(config->owningTarget, config->ownedDevice); } - FREE(config->threadConfigString); FREE(config->poolName); FREE(config->parentDeviceName); FREE(config->originalString); diff --git a/vdo/kernel/deviceConfig.h b/vdo/kernel/deviceConfig.h index f19026cf..b82691e0 100644 --- a/vdo/kernel/deviceConfig.h +++ b/vdo/kernel/deviceConfig.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/deviceConfig.h#5 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/deviceConfig.h#9 $ */ #ifndef DEVICE_CONFIG_H #define DEVICE_CONFIG_H @@ -25,8 +25,10 @@ #include "kernelTypes.h" +// This structure is memcmp'd for equality. Keep it +// packed and don't add any fields that are not +// properly set in both extant and parsed configs. typedef struct { - int baseThreads; int bioAckThreads; int bioThreads; int bioRotationInterval; @@ -34,7 +36,7 @@ typedef struct { int logicalZones; int physicalZones; int hashZones; -} ThreadCountConfig; +} __attribute__((packed)) ThreadCountConfig; typedef uint32_t TableVersion; @@ -51,11 +53,9 @@ typedef struct { unsigned int cacheSize; unsigned int blockMapMaximumAge; bool mdRaid5ModeEnabled; - bool readCacheEnabled; - unsigned int readCacheExtraBlocks; char *poolName; - char *threadConfigString; ThreadCountConfig threadCounts; + BlockCount maxDiscardBlocks; } DeviceConfig; /** diff --git a/vdo/kernel/dmvdo.c b/vdo/kernel/dmvdo.c index 6be2ed10..36844a9b 100644 --- a/vdo/kernel/dmvdo.c +++ b/vdo/kernel/dmvdo.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dmvdo.c#20 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dmvdo.c#23 $ */ #include "dmvdo.h" @@ -34,10 +34,10 @@ #include "deviceRegistry.h" #include "dump.h" #include "instanceNumber.h" +#include "ioSubmitter.h" #include "kernelLayer.h" #include "kvdoFlush.h" #include "memoryUsage.h" -#include "readCache.h" #include "statusProcfs.h" #include "stringUtils.h" #include "sysfs.h" @@ -46,24 +46,6 @@ struct kvdoDevice kvdoDevice; // global driver state (poorly named) -/* - * Set the default maximum number of sectors for a single discard bio. - * - * The value 1024 is the largest usable value on HD systems. A 2048 sector - * discard on a busy HD system takes 31 seconds. We should use a value no - * higher than 1024, which takes 15 to 16 seconds on a busy HD system. - * - * But using large values results in 120 second blocked task warnings in - * /var/log/kern.log. In order to avoid these warnings, we choose to use the - * smallest reasonable value. See VDO-3062 and VDO-3087. - * - * We allow setting of the value for max_discard_sectors even in situations - * where we only split on 4k (see comments for HAS_NO_BLKDEV_SPLIT) as the - * value is still used in other code, like sysfs display of queue limits and - * most especially in dm-thin to determine whether to pass down discards. - */ -unsigned int maxDiscardSectors = VDO_SECTORS_PER_BLOCK; - /* * We want to support discard requests, but early device mapper versions * did not give us any help. Define HAS_DISCARDS_SUPPORTED if the @@ -180,8 +162,26 @@ static void vdoIoHints(struct dm_target *ti, struct queue_limits *limits) // The optimal io size for streamed/sequential io blk_limits_io_opt(limits, VDO_BLOCK_SIZE); - // Discard hints - limits->max_discard_sectors = maxDiscardSectors; + /* + * Sets the maximum discard size that will be passed into VDO. This value + * comes from a table line value passed in during dmsetup create. + * + * The value 1024 is the largest usable value on HD systems. A 2048 sector + * discard on a busy HD system takes 31 seconds. We should use a value no + * higher than 1024, which takes 15 to 16 seconds on a busy HD system. + * + * But using large values results in 120 second blocked task warnings in + * /var/log/kern.log. In order to avoid these warnings, we choose to use the + * smallest reasonable value. See VDO-3062 and VDO-3087. + * + * We allow setting of the value for max_discard_sectors even in situations + * where we only split on 4k (see comments for HAS_NO_BLKDEV_SPLIT) as the + * value is still used in other code, like sysfs display of queue limits and + * most especially in dm-thin to determine whether to pass down discards. + */ + limits->max_discard_sectors + = layer->deviceConfig->maxDiscardBlocks * VDO_SECTORS_PER_BLOCK; + limits->discard_granularity = VDO_BLOCK_SIZE; #if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0) limits->discard_zeroes_data = 1; @@ -586,19 +586,16 @@ static int vdoInitialize(struct dm_target *ti, uint64_t logicalSize = to_bytes(ti->len); BlockCount logicalBlocks = logicalSize / blockSize; - logDebug("Logical block size = %" PRIu64, + logDebug("Logical block size = %" PRIu64, (uint64_t) config->logicalBlockSize); - logDebug("Logical blocks = %" PRIu64, logicalBlocks); - logDebug("Physical block size = %" PRIu64, (uint64_t) blockSize); - logDebug("Physical blocks = %" PRIu64, config->physicalBlocks); - logDebug("Block map cache blocks = %u", config->cacheSize); - logDebug("Block map maximum age = %u", config->blockMapMaximumAge); - logDebug("MD RAID5 mode = %s", (config->mdRaid5ModeEnabled - ? "on" : "off")); - logDebug("Read cache mode = %s", (config->readCacheEnabled - ? "enabled" : "disabled")); - logDebug("Read cache extra blocks = %u", config->readCacheExtraBlocks); - logDebug("Write policy = %s", getConfigWritePolicyString(config)); + logDebug("Logical blocks = %" PRIu64, logicalBlocks); + logDebug("Physical block size = %" PRIu64, (uint64_t) blockSize); + logDebug("Physical blocks = %" PRIu64, config->physicalBlocks); + logDebug("Block map cache blocks = %u", config->cacheSize); + logDebug("Block map maximum age = %u", config->blockMapMaximumAge); + logDebug("MD RAID5 mode = %s", (config->mdRaid5ModeEnabled + ? "on" : "off")); + logDebug("Write policy = %s", getConfigWritePolicyString(config)); // The threadConfig will be copied by the VDO if it's successfully // created. @@ -793,14 +790,13 @@ static int vdoPreresume(struct dm_target *ti) if (result != VDO_SUCCESS) { logErrorWithStringError(result, "Commit of modifications to device '%s'" " failed", config->poolName); - return result; - } - setKernelLayerActiveConfig(layer, config); - - result = resumeKernelLayer(layer); - if (result != VDO_SUCCESS) { - logError("resume of device '%s' failed with error: %d", - layer->deviceConfig->poolName, result); + } else { + setKernelLayerActiveConfig(layer, config); + result = resumeKernelLayer(layer); + if (result != VDO_SUCCESS) { + logError("resume of device '%s' failed with error: %d", + layer->deviceConfig->poolName, result); + } } unregisterThreadDeviceID(); return mapToSystemError(result); diff --git a/vdo/kernel/dump.c b/vdo/kernel/dump.c index 1eea4ff7..f72ef5b0 100644 --- a/vdo/kernel/dump.c +++ b/vdo/kernel/dump.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dump.c#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/dump.c#2 $ */ #include "dump.h" @@ -31,8 +31,8 @@ #include "dedupeIndex.h" #include "histogram.h" +#include "ioSubmitter.h" #include "logger.h" -#include "readCache.h" enum dumpOptions { // WorkQueues @@ -44,7 +44,6 @@ enum dumpOptions { // MemoryPools SHOW_VIO_POOL, // Others - SHOW_READCACHES, SHOW_VDO_STATUS, // This one means an option overrides the "default" choices, instead // of altering them. @@ -61,7 +60,6 @@ enum dumpOptionFlags { // MemoryPools FLAG_SHOW_VIO_POOL = (1 << SHOW_VIO_POOL), // Others - FLAG_SHOW_READCACHES = (1 << SHOW_READCACHES), FLAG_SHOW_VDO_STATUS = (1 << SHOW_VDO_STATUS), // Special FLAG_SKIP_DEFAULT = (1 << SKIP_DEFAULT) @@ -116,9 +114,6 @@ static void doDump(KernelLayer *layer, (dumpOptionsRequested & FLAG_SHOW_ALBIREO_QUEUE) != 0); dumpBufferPool(layer->dataKVIOPool, (dumpOptionsRequested & FLAG_SHOW_VIO_POOL) != 0); - if ((dumpOptionsRequested & FLAG_SHOW_READCACHES) != 0) { - readCacheDump(getIOSubmitterReadCache(layer->ioSubmitter), true, false); - } if ((dumpOptionsRequested & FLAG_SHOW_VDO_STATUS) != 0) { // Options should become more fine-grained when we have more to // display here. @@ -157,8 +152,6 @@ static int parseDumpOptions(unsigned int argc, { "reqq", FLAG_SKIP_DEFAULT | FLAG_SHOW_REQUEST_QUEUE }, { "viopool", FLAG_SKIP_DEFAULT | FLAG_SHOW_VIO_POOL }, { "vdo", FLAG_SKIP_DEFAULT | FLAG_SHOW_VDO_STATUS }, - { "readcache", FLAG_SKIP_DEFAULT | FLAG_SHOW_READCACHES }, - { "readcaches", FLAG_SKIP_DEFAULT | FLAG_SHOW_READCACHES }, { "pools", FLAG_SKIP_DEFAULT | FLAGS_ALL_POOLS }, { "queues", FLAG_SKIP_DEFAULT | FLAGS_ALL_QUEUES }, diff --git a/vdo/kernel/ioSubmitter.c b/vdo/kernel/ioSubmitter.c index 7e07062c..66a2dee9 100644 --- a/vdo/kernel/ioSubmitter.c +++ b/vdo/kernel/ioSubmitter.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitter.c#4 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitter.c#5 $ */ #include "ioSubmitterInternals.h" @@ -24,9 +24,9 @@ #include "memoryAlloc.h" #include "bio.h" +#include "dataKVIO.h" #include "kernelLayer.h" #include "logger.h" -#include "readCache.h" enum { /* @@ -489,7 +489,6 @@ int makeIOSubmitter(const char *threadNamePrefix, unsigned int threadCount, unsigned int rotationInterval, unsigned int maxRequestsActive, - unsigned int readCacheBlocks, KernelLayer *layer, IOSubmitter **ioSubmitterPtr) { @@ -503,15 +502,6 @@ int makeIOSubmitter(const char *threadNamePrefix, return result; } - if (readCacheBlocks > 0) { - result = makeReadCache(layer, readCacheBlocks, threadCount, - &ioSubmitter->readCache); - if (result != VDO_SUCCESS) { - FREE(ioSubmitter); - return result; - } - } - // Setup for each bio-submission work queue char queueName[MAX_QUEUE_NAME_LEN]; ioSubmitter->bioQueueRotationInterval = rotationInterval; @@ -584,17 +574,9 @@ void freeIOSubmitter(IOSubmitter *ioSubmitter) freeIntMap(&ioSubmitter->bioQueueData[i].map); } } - freeReadCache(&ioSubmitter->readCache); FREE(ioSubmitter); } -/**********************************************************************/ -void getBioWorkQueueReadCacheStats(IOSubmitter *ioSubmitter, - ReadCacheStats *totalledStats) -{ - *totalledStats = readCacheGetStats(ioSubmitter->readCache); -} - /**********************************************************************/ void dumpBioWorkQueue(IOSubmitter *ioSubmitter) { @@ -604,12 +586,6 @@ void dumpBioWorkQueue(IOSubmitter *ioSubmitter) } -/**********************************************************************/ -ReadCache *getIOSubmitterReadCache(IOSubmitter *ioSubmitter) -{ - return ioSubmitter->readCache; -} - /**********************************************************************/ void enqueueByPBNBioWorkItem(IOSubmitter *ioSubmitter, PhysicalBlockNumber pbn, diff --git a/vdo/kernel/ioSubmitter.h b/vdo/kernel/ioSubmitter.h index 63371124..4af2702f 100644 --- a/vdo/kernel/ioSubmitter.h +++ b/vdo/kernel/ioSubmitter.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitter.h#2 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitter.h#3 $ */ #ifndef IOSUBMITTER_H @@ -75,7 +75,6 @@ void completeAsyncBio(BIO *bio, int error); * bio-submission threads when enqueuing work * items * @param [in] maxRequestsActive Number of bios for merge tracking - * @param [in] readCacheBlocks Number of read cache blocks * @param [in] layer The kernel layer * @param [out] ioSubmitter Pointer to the new data structure * @@ -85,7 +84,6 @@ int makeIOSubmitter(const char *threadNamePrefix, unsigned int threadCount, unsigned int rotationInterval, unsigned int maxRequestsActive, - unsigned int readCacheBlocks, KernelLayer *layer, IOSubmitter **ioSubmitter); @@ -107,16 +105,6 @@ void cleanupIOSubmitter(IOSubmitter *ioSubmitter); **/ void freeIOSubmitter(IOSubmitter *ioSubmitter); -/** - * Retrieve the aggregated read cache statistics for each bio submission - * work queue. - * - * @param [in] ioSubmitter The I/O submitter data - * @param [out] totalledStats Where to store the statistics - **/ -void getBioWorkQueueReadCacheStats(IOSubmitter *ioSubmitter, - ReadCacheStats *totalledStats); - /** * Dump info to the kernel log about the work queue used by the * physical layer. For debugging only. @@ -138,15 +126,6 @@ void dumpBioWorkQueue(IOSubmitter *ioSubmitter); **/ void enqueueBioWorkItem(IOSubmitter *ioSubmitter, KvdoWorkItem *workItem); -/** - * Get the read cache used by the I/O submitter - * - * @param ioSubmitter The I/O submitter data - * - * @return read cache - **/ -ReadCache *getIOSubmitterReadCache(IOSubmitter *ioSubmitter); - /** * Submit bio but don't block. * diff --git a/vdo/kernel/ioSubmitterInternals.h b/vdo/kernel/ioSubmitterInternals.h index 1562e6e3..f79504bd 100644 --- a/vdo/kernel/ioSubmitterInternals.h +++ b/vdo/kernel/ioSubmitterInternals.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitterInternals.h#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/ioSubmitterInternals.h#2 $ */ #ifndef IOSUBMITTERINTERNALS_H @@ -28,7 +28,6 @@ #endif #include "ioSubmitter.h" -#include "readCache.h" /* * Submission of bio operations to the underlying storage device will @@ -65,7 +64,6 @@ struct ioSubmitter { unsigned int numBioQueuesUsed; unsigned int bioQueueRotationInterval; unsigned int bioQueueRotor; - ReadCache *readCache; BioQueueData bioQueueData[]; }; diff --git a/vdo/kernel/kernelLayer.c b/vdo/kernel/kernelLayer.c index dd44556d..71468f66 100644 --- a/vdo/kernel/kernelLayer.c +++ b/vdo/kernel/kernelLayer.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kernelLayer.c#18 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kernelLayer.c#21 $ */ #include "kernelLayer.h" @@ -45,7 +45,6 @@ #include "kvdoFlush.h" #include "kvio.h" #include "poolSysfs.h" -#include "readCache.h" #include "statusProcfs.h" #include "stringUtils.h" #include "verify.h" @@ -727,13 +726,11 @@ int makeKernelLayer(uint64_t startingSector, return result; } - config->threadCounts.baseThreads = (*threadConfigPointer)->baseThreadCount; - logInfo("zones: %d logical, %d physical, %d hash; base threads: %d", config->threadCounts.logicalZones, config->threadCounts.physicalZones, config->threadCounts.hashZones, - config->threadCounts.baseThreads); + (*threadConfigPointer)->baseThreadCount); result = makeBatchProcessor(layer, returnDataKVIOBatchToPool, layer, &layer->dataKVIOReleaser); @@ -847,15 +844,11 @@ int makeKernelLayer(uint64_t startingSector, setKernelLayerState(layer, LAYER_REQUEST_QUEUE_INITIALIZED); - // Bio queue and read cache - unsigned int readCacheBlocks - = (config->readCacheEnabled - ? (requestLimit + config->readCacheExtraBlocks) : 0); + // Bio queue result = makeIOSubmitter(layer->threadNamePrefix, config->threadCounts.bioThreads, config->threadCounts.bioRotationInterval, layer->requestLimiter.limit, - readCacheBlocks, layer, &layer->ioSubmitter); if (result != VDO_SUCCESS) { @@ -934,18 +927,8 @@ int prepareToModifyKernelLayer(KernelLayer *layer, return VDO_PARAMETER_MISMATCH; } - if (config->readCacheEnabled != extantConfig->readCacheEnabled) { - *errorPtr = "Read cache enabled cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (config->readCacheExtraBlocks != extantConfig->readCacheExtraBlocks) { - *errorPtr = "Read cache size cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (strcmp(config->threadConfigString, extantConfig->threadConfigString) - != 0) { + if (memcmp(&config->threadCounts, &extantConfig->threadCounts, + sizeof(ThreadCountConfig)) != 0) { *errorPtr = "Thread configuration cannot change"; return VDO_PARAMETER_MISMATCH; } diff --git a/vdo/kernel/kernelStatistics.h b/vdo/kernel/kernelStatistics.h index 993eea60..a95ed06f 100644 --- a/vdo/kernel/kernelStatistics.h +++ b/vdo/kernel/kernelStatistics.h @@ -36,16 +36,6 @@ typedef struct { uint64_t fua; } BioStats; -/** The statistics for the read cache. */ -typedef struct { - /** Number of times the read cache was asked for a specific pbn. */ - uint64_t accesses; - /** Number of times the read cache found the requested pbn. */ - uint64_t hits; - /** Number of times the found requested pbn had valid data. */ - uint64_t dataHits; -} ReadCacheStats; - typedef struct { /** Tracked bytes currently allocated. */ uint64_t bytesUsed; @@ -111,8 +101,6 @@ typedef struct { BioStats biosAcknowledgedPartial; /** Current number of bios in progress */ BioStats biosInProgress; - /** The read cache stats. */ - ReadCacheStats readCache; /** Memory usage stats. */ MemoryUsage memoryUsage; /** The statistics for the UDS index */ diff --git a/vdo/kernel/kernelTypes.h b/vdo/kernel/kernelTypes.h index c0775f17..c4944005 100644 --- a/vdo/kernel/kernelTypes.h +++ b/vdo/kernel/kernelTypes.h @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kernelTypes.h#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kernelTypes.h#2 $ */ #ifndef KERNEL_TYPES_H @@ -51,8 +51,6 @@ typedef struct kvdoFlush KVDOFlush; typedef struct kvdoWorkItem KvdoWorkItem; typedef struct kvdoWorkQueue KvdoWorkQueue; typedef struct kvio KVIO; -typedef struct readCache ReadCache; -typedef struct readCacheEntry ReadCacheEntry; typedef void (*KVIOCallback)(KVIO *kvio); typedef void (*DataKVIOCallback)(DataKVIO *dataKVIO); diff --git a/vdo/kernel/kvio.c b/vdo/kernel/kvio.c index 289e705d..35929da3 100644 --- a/vdo/kernel/kvio.c +++ b/vdo/kernel/kvio.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kvio.c#3 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kvio.c#4 $ */ #include "kvio.h" @@ -32,7 +32,6 @@ #include "bio.h" #include "ioSubmitter.h" #include "kvdoFlush.h" -#include "readCache.h" /** * A function to tell vdo that we have completed the requested async @@ -114,7 +113,7 @@ void kvdoWriteCompressedBlock(AllocatingVIO *allocatingVIO) resetBio(bio, kvio->layer); setBioOperationWrite(bio); setBioSector(bio, blockToSector(kvio->layer, kvio->vio->physical)); - invalidateCacheAndSubmitBio(kvio, BIO_Q_ACTION_COMPRESSED_DATA); + submitBio(bio, BIO_Q_ACTION_COMPRESSED_DATA); } /** diff --git a/vdo/kernel/poolSysfsStats.c b/vdo/kernel/poolSysfsStats.c index dbbc6109..d6c0bf01 100644 --- a/vdo/kernel/poolSysfsStats.c +++ b/vdo/kernel/poolSysfsStats.c @@ -2293,57 +2293,6 @@ static PoolStatsAttribute poolStatsBiosInProgressFuaAttr = { .show = poolStatsBiosInProgressFuaShow, }; -/**********************************************************************/ -/** Number of times the read cache was asked for a specific pbn. */ -static ssize_t poolStatsReadCacheAccessesShow(KernelLayer *layer, char *buf) -{ - ssize_t retval; - mutex_lock(&layer->statsMutex); - getKernelStats(layer, &layer->kernelStatsStorage); - retval = sprintf(buf, "%" PRIu64 "\n", layer->kernelStatsStorage.readCache.accesses); - mutex_unlock(&layer->statsMutex); - return retval; -} - -static PoolStatsAttribute poolStatsReadCacheAccessesAttr = { - .attr = { .name = "read_cache_accesses", .mode = 0444, }, - .show = poolStatsReadCacheAccessesShow, -}; - -/**********************************************************************/ -/** Number of times the read cache found the requested pbn. */ -static ssize_t poolStatsReadCacheHitsShow(KernelLayer *layer, char *buf) -{ - ssize_t retval; - mutex_lock(&layer->statsMutex); - getKernelStats(layer, &layer->kernelStatsStorage); - retval = sprintf(buf, "%" PRIu64 "\n", layer->kernelStatsStorage.readCache.hits); - mutex_unlock(&layer->statsMutex); - return retval; -} - -static PoolStatsAttribute poolStatsReadCacheHitsAttr = { - .attr = { .name = "read_cache_hits", .mode = 0444, }, - .show = poolStatsReadCacheHitsShow, -}; - -/**********************************************************************/ -/** Number of times the found requested pbn had valid data. */ -static ssize_t poolStatsReadCacheDataHitsShow(KernelLayer *layer, char *buf) -{ - ssize_t retval; - mutex_lock(&layer->statsMutex); - getKernelStats(layer, &layer->kernelStatsStorage); - retval = sprintf(buf, "%" PRIu64 "\n", layer->kernelStatsStorage.readCache.dataHits); - mutex_unlock(&layer->statsMutex); - return retval; -} - -static PoolStatsAttribute poolStatsReadCacheDataHitsAttr = { - .attr = { .name = "read_cache_data_hits", .mode = 0444, }, - .show = poolStatsReadCacheDataHitsShow, -}; - /**********************************************************************/ /** Tracked bytes currently allocated. */ static ssize_t poolStatsMemoryUsageBytesUsedShow(KernelLayer *layer, char *buf) @@ -2698,9 +2647,6 @@ struct attribute *poolStatsAttrs[] = { &poolStatsBiosInProgressDiscardAttr.attr, &poolStatsBiosInProgressFlushAttr.attr, &poolStatsBiosInProgressFuaAttr.attr, - &poolStatsReadCacheAccessesAttr.attr, - &poolStatsReadCacheHitsAttr.attr, - &poolStatsReadCacheDataHitsAttr.attr, &poolStatsMemoryUsageBytesUsedAttr.attr, &poolStatsMemoryUsagePeakBytesUsedAttr.attr, &poolStatsMemoryUsageBiosUsedAttr.attr, diff --git a/vdo/kernel/readCache.c b/vdo/kernel/readCache.c deleted file mode 100644 index 7f7368ba..00000000 --- a/vdo/kernel/readCache.c +++ /dev/null @@ -1,1069 +0,0 @@ -/* - * Copyright (c) 2018 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - * - */ -#include "readCache.h" - -#include "logger.h" -#include "memoryAlloc.h" - -#include "atomic.h" -#include "compressedBlock.h" -#include "intMap.h" -#include "lz4.h" -#include "statusCodes.h" -#include "waitQueue.h" - -#include "bio.h" -#include "dataKVIO.h" -#include "histogram.h" -#include "ioSubmitterInternals.h" - -/* - * Zoned read-cache implementation. - * - * The read cache holds data matching certain physical blocks on the - * target storage device. The address space is subdivided into zones - * associated with different threads. - * - * We cannot queue up VIOs here that want cache slots that aren't - * available yet. VIOs that come to us looking for cache slots can be - * holding PBN locks at the time, and (in other cases) VIOs we've - * already given cache slots to can later go attempt to acquire PBN - * locks. We might be able to do queueing as long as each zone has at - * least (more than?) maxRequests/2 slots, but it's probably better to - * make each zone capable of handling all the requests at one time, - * expending the extra megabytes (in multi-zone configurations), - * keeping the cache code a little simpler, and avoiding the whole - * deadlock question. - */ - -static const PhysicalBlockNumber INVALID_BLOCK = (PhysicalBlockNumber) -1; - -typedef enum { - RC_FREE, // entry is free - RC_RECLAIMABLE, // entry is reclaimable - RC_IN_USE, // in use but no physical block number - RC_IN_USE_PBN, // in use, has physical block number - RC_MAX_STATES -} CacheEntryState; - -typedef struct readCacheEntryRelease { - KvdoWorkItem workItem; - Atomic32 releases; -} ReadCacheEntryRelease; - -typedef struct readCacheZone ReadCacheZone; - -struct readCacheEntry { - struct list_head list; - int entryNum; - bool dataValid; - Atomic32 refCount; - uint64_t hits; - PhysicalBlockNumber pbn; - char *dataBlock; - int error; - BIO *bio; - ReadCacheZone *zone; - spinlock_t waitLock; - WaitQueue callbackWaiters; - ReadCacheEntryRelease release; -}; - -struct readCacheZone { - ReadCacheStats stats; - KernelLayer *layer; - unsigned int numEntries; - struct list_head busyList; - struct list_head reclaimList; - struct list_head freeList; - IntMap *pbnMap; - ReadCacheEntry **blockMap; - char *dataBlocks; -}; - -struct readCache { - KernelLayer *layer; - unsigned int zoneCount; - ReadCacheZone *zones[]; -}; - -static void dumpReadCacheEntry(char tag, ReadCacheEntry *entry); - -/**********************************************************************/ -static inline uint32_t getCacheEntryRefCount(ReadCacheZone *zone, - ReadCacheEntry *cacheEntry) -{ - return relaxedLoad32(&cacheEntry->refCount); -} - -/**********************************************************************/ -static inline uint32_t addToCacheEntryRefCount(ReadCacheZone *zone, - ReadCacheEntry *cacheEntry, - int32_t delta) -{ - return relaxedAdd32(&cacheEntry->refCount, delta); -} - -/**********************************************************************/ -static inline void setCacheEntryRefCount(ReadCacheZone *zone, - ReadCacheEntry *cacheEntry, - uint32_t newValue) -{ - relaxedStore32(&cacheEntry->refCount, newValue); -} - -/** - * Logs the contents of a ReadCacheEntry at info level, for debugging. - * - * @param label Arbitrary label for the log message - * @param cacheEntry The cache entry to log - */ -static inline void logCacheEntry(const char *label, ReadCacheEntry *cacheEntry) -{ - if (cacheEntry == NULL) { - logInfo("%s: cacheEntry=NULL", label); - return; - } - - uint32_t refCount = relaxedLoad32(&cacheEntry->refCount); - if (cacheEntry->pbn == INVALID_BLOCK) { - logInfo("%s: entryNum=%d refCount=%d pbn=INVALID", - label, cacheEntry->entryNum, refCount); - } else { - logInfo("%s: entryNum=%d refCount=%d pbn=%" PRIu64, - label, cacheEntry->entryNum, refCount, cacheEntry->pbn); - } -} - -/** - * Returns the state of a ReadCacheEntry. - * - * Can only be called with appropriate synchronization. - * - * @param cacheEntry The cache entry - * - * @return The entry's state - */ -static inline CacheEntryState getState(ReadCacheEntry *cacheEntry) -{ - if (relaxedLoad32(&cacheEntry->refCount) == 0) { - return (cacheEntry->pbn == INVALID_BLOCK) ? RC_FREE : RC_RECLAIMABLE; - } else { - return (cacheEntry->pbn == INVALID_BLOCK) ? RC_IN_USE : RC_IN_USE_PBN; - } -} - -/** - * Given a ReadCacheEntry, decrement its reference count, moving it - * to the free or reclaim list if the reference count reaches 0. - * - * @param cacheEntry The cache entry - */ -static void releaseBlockInternal(ReadCacheEntry *cacheEntry) -{ - ReadCacheZone *zone = cacheEntry->zone; - if (ASSERT(getCacheEntryRefCount(zone, cacheEntry) > 0, - "freeing in-use block")) { - return; - } - if (addToCacheEntryRefCount(zone, cacheEntry, -1) == 0) { - if (getCacheEntryRefCount(zone, cacheEntry) == 0) { - if (cacheEntry->pbn != INVALID_BLOCK) { - list_move_tail(&cacheEntry->list, &zone->reclaimList); - } else { - list_move_tail(&cacheEntry->list, &zone->freeList); - } - } - } -} - -/**********************************************************************/ -static void assertRunningInRCQueueForPBN(ReadCache *cache, - PhysicalBlockNumber pbn) -{ - assertRunningInBioQueueForPBN(pbn); -} - -/** - * Return a pointer to the requested zone. - * - * @param readCache The read cache - * @param index The zone number - * - * @return The zone pointer - **/ -static ReadCacheZone *getReadCacheZone(ReadCache *readCache, - unsigned int index) -{ - BUG_ON(index >= readCache->zoneCount); - return readCache->zones[index]; -} - -/**********************************************************************/ -static ReadCacheZone *zoneForPBN(ReadCache *cache, PhysicalBlockNumber pbn) -{ - unsigned int zone = bioQueueNumberForPBN(cache->layer->ioSubmitter, pbn); - return getReadCacheZone(cache, zone); -} - -/** - * Sets the physical block number of a cacheEntry and enters its - * mapping into the PBN map dependent on the replace flag. - * - * @param zone The read cache zone - * @param cacheEntry The cache entry - * @param pbn The physical block number - */ -static int setBlockPBNInternal(ReadCacheZone *zone, - ReadCacheEntry *cacheEntry, - PhysicalBlockNumber pbn) -{ - assertRunningInRCQueueForPBN(zone->layer->ioSubmitter->readCache, pbn); - - int result = intMapPut(zone->pbnMap, pbn, cacheEntry, true, NULL); - if (result != VDO_SUCCESS) { - return result; - } - cacheEntry->pbn = pbn; - return VDO_SUCCESS; -} - -/**********************************************************************/ -static ReadCacheEntry *findBlockForReadInternal(ReadCacheZone *zone, - PhysicalBlockNumber pbn) -{ - ACCESS_ONCE(zone->stats.accesses)++; - ReadCacheEntry *cacheEntry = intMapGet(zone->pbnMap, pbn); - if (cacheEntry != NULL) { - if (getState(cacheEntry) == RC_RECLAIMABLE) { - list_move_tail(&cacheEntry->list, &zone->busyList); - } - ACCESS_ONCE(zone->stats.hits)++; - if (cacheEntry->dataValid) { - ACCESS_ONCE(zone->stats.dataHits)++; - } - addToCacheEntryRefCount(zone, cacheEntry, 1); - cacheEntry->hits++; - } - return cacheEntry; -} - -/**********************************************************************/ -static ReadCacheStats readCacheZoneGetStats(ReadCacheZone *zone) -{ - if (zone == NULL) { - ReadCacheStats stats = { - .accesses = 0, - .dataHits = 0, - .hits = 0, - }; - return stats; - } else { - // N.B.: No locking is used, so the values fetched may be slightly - // out of sync. - ReadCacheStats stats = { - .accesses = ACCESS_ONCE(zone->stats.accesses), - .dataHits = ACCESS_ONCE(zone->stats.dataHits), - .hits = ACCESS_ONCE(zone->stats.hits), - }; - return stats; - } -} - -/**********************************************************************/ -ReadCacheStats readCacheGetStats(ReadCache *readCache) -{ - ReadCacheStats totalledStats; - - // Sum the read cache stats. - totalledStats.accesses = 0; - totalledStats.dataHits = 0; - totalledStats.hits = 0; - if (readCache == NULL) { - return totalledStats; - } - - for (unsigned int i = 0; i < readCache->zoneCount; i++) { - ReadCacheStats stats = readCacheZoneGetStats(readCache->zones[i]); - totalledStats.accesses += stats.accesses; - totalledStats.dataHits += stats.dataHits; - totalledStats.hits += stats.hits; - } - - return totalledStats; -} - -/**********************************************************************/ -static int getScratchBlockInternal(ReadCacheZone *zone, - ReadCacheEntry **cacheEntryPtr) -{ - ReadCacheEntry *cacheEntry; - if (list_empty(&zone->freeList)) { - if (unlikely(list_empty(&zone->reclaimList))) { - ASSERT_LOG_ONLY(false, - "read cache has free scratch blocks"); - return VDO_READ_CACHE_BUSY; - } - cacheEntry = list_first_entry(&zone->reclaimList, - ReadCacheEntry, list); - intMapRemove(zone->pbnMap, cacheEntry->pbn); - cacheEntry->pbn = INVALID_BLOCK; - ASSERT_LOG_ONLY(relaxedLoad32(&cacheEntry->refCount) == 0, - "reclaim block has zero refcount"); - } else { - cacheEntry = list_first_entry(&zone->freeList, ReadCacheEntry, - list); - ASSERT_LOG_ONLY(relaxedLoad32(&cacheEntry->refCount) == 0, - "free block has zero refcount"); - } - list_move(&cacheEntry->list, &zone->busyList); - setCacheEntryRefCount(zone, cacheEntry, 1); - cacheEntry->hits = 0; - cacheEntry->dataValid = false; - ASSERT_LOG_ONLY(cacheEntry->pbn == INVALID_BLOCK, - "returned block has no pbn"); - *cacheEntryPtr = cacheEntry; - return VDO_SUCCESS; -} - -/**********************************************************************/ -static int allocateBlockForReadInternal(ReadCacheZone *zone, - PhysicalBlockNumber pbn, - ReadCacheEntry **cacheEntryPtr) -{ - assertRunningInRCQueueForPBN(zone->layer->ioSubmitter->readCache, pbn); - - ReadCacheEntry *cacheEntry = findBlockForReadInternal(zone, pbn); - if (cacheEntry == NULL) { - int result = getScratchBlockInternal(zone, &cacheEntry); - if (result != VDO_SUCCESS) { - return result; - } - setBlockPBNInternal(zone, cacheEntry, pbn); - } - *cacheEntryPtr = cacheEntry; - return VDO_SUCCESS; -} - -/** - * Uncompress the data that's just been read or fetched from the cache - * and then call back the requesting DataKVIO. - * - * @param workItem The DataKVIO requesting the data - **/ -static void uncompressReadBlock(KvdoWorkItem *workItem) -{ - DataKVIO *dataKVIO = workItemAsDataKVIO(workItem); - ReadBlock *readBlock = &dataKVIO->readBlock; - BlockSize blockSize = VDO_BLOCK_SIZE; - - // The DataKVIO's scratch block will be used to contain the - // uncompressed data. - uint16_t fragmentOffset, fragmentSize; - char *compressedData = readBlock->data; - int result = getCompressedBlockFragment(readBlock->mappingState, - compressedData, blockSize, - &fragmentOffset, - &fragmentSize); - if (result != VDO_SUCCESS) { - logDebug("%s: frag err %d", __func__, result); - readBlock->status = result; - readBlock->callback(dataKVIO); - return; - } - - char *fragment = compressedData + fragmentOffset; - int size = LZ4_uncompress_unknownOutputSize(fragment, dataKVIO->scratchBlock, - fragmentSize, blockSize); - if (size == blockSize) { - readBlock->data = dataKVIO->scratchBlock; - } else { - logDebug("%s: lz4 error", __func__); - readBlock->status = VDO_INVALID_FRAGMENT; - } - - readBlock->callback(dataKVIO); -} - -/** - * Now that we have gotten the data, either from the cache or storage, - * uncompress the data if necessary and then call back the requesting DataKVIO. - * - * @param dataKVIO The DataKVIO requesting the data - * @param result The result of the read operation - **/ -static void completeRead(DataKVIO *dataKVIO, int result) -{ - ReadBlock *readBlock = &dataKVIO->readBlock; - readBlock->status = result; - - if ((result == VDO_SUCCESS) && isCompressed(readBlock->mappingState)) { - launchDataKVIOOnCPUQueue(dataKVIO, uncompressReadBlock, NULL, - CPU_Q_ACTION_COMPRESS_BLOCK); - return; - } - - readBlock->callback(dataKVIO); -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) -/** - * Callback for a bio doing a read with no cache. - * - * @param bio The bio - */ -static void readBioCallback(BIO *bio) -#else -/** - * Callback for a bio doing a read with no cache. - * - * @param bio The bio - * @param result The result of the read operation - */ -static void readBioCallback(BIO *bio, int result) -#endif -{ - KVIO *kvio = (KVIO *) bio->bi_private; - DataKVIO *dataKVIO = kvioAsDataKVIO(kvio); - dataKVIO->readBlock.data = dataKVIO->readBlock.buffer; - dataKVIOAddTraceRecord(dataKVIO, THIS_LOCATION(NULL)); - countCompletedBios(bio); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) - completeRead(dataKVIO, getBioResult(bio)); -#else - completeRead(dataKVIO, result); -#endif -} - -/** - * Callback for all waiters on a cache block read. - * - * @param waiter The waiter - * @param context The context passed to callback - */ -static void cacheBlockReadWaiterCallback(Waiter *waiter, void *context) -{ - completeRead(dataVIOAsDataKVIO(waiterAsDataVIO(waiter)), *((int *) context)); -} - -/**********************************************************************/ -static void readBlockCompletionWork(KvdoWorkItem *item) -{ - DataKVIO *dataKVIO = workItemAsDataKVIO(item); - dataKVIOAddTraceRecord(dataKVIO, THIS_LOCATION(NULL)); - - int error = dataKVIO->readBlock.status; - ReadCacheEntry *cacheEntry = dataKVIO->readBlock.cacheEntry; - - // We're going to have a single callback here, since there is now no - // difference between success and failure. - spin_lock(&cacheEntry->waitLock); - cacheEntry->dataValid = (error == 0); - notifyAllWaiters(&cacheEntry->callbackWaiters, - cacheBlockReadWaiterCallback, &error); - spin_unlock(&cacheEntry->waitLock); -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) -/** - * Callback for a bio which did a read to populate a cache entry. - * - * @param bio The bio - */ -static void readCacheBioCallback(BIO *bio) -#else -/** - * Callback for a bio which did a read to populate a cache entry. - * - * @param bio The bio - * @param error The result of the read operation - */ -static void readCacheBioCallback(BIO *bio, int error) -#endif -{ - KVIO *kvio = (KVIO *) bio->bi_private; - DataKVIO *dataKVIO = kvioAsDataKVIO(kvio); - dataKVIOAddTraceRecord(dataKVIO, THIS_LOCATION(NULL)); - countCompletedBios(bio); - - // Set read block operation back to nothing so bio counting works -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) - dataKVIO->readBlock.status = getBioResult(bio); -#else - dataKVIO->readBlock.status = error; -#endif - enqueueDataKVIO(dataKVIO, readBlockCompletionWork, NULL, - REQ_Q_ACTION_VIO_CALLBACK); -} - -/** - * Removes any entry for the specified physical block number from - * the read cache's known PBNs. - * - * @param readCache The read cache - * @param pbn The physical block number - **/ -static void readCacheInvalidatePBN(ReadCache *readCache, - PhysicalBlockNumber pbn) -{ - if (ASSERT(readCache != NULL, "specified read cache")) { - return; - } - - assertRunningInRCQueueForPBN(readCache, pbn); - - ReadCacheZone *zone = zoneForPBN(readCache, pbn); - intMapRemove(zone->pbnMap, pbn); -} - -/**********************************************************************/ -static void invalidatePBNAndContinueVIO(KvdoWorkItem *item) -{ - KVIO *kvio = workItemAsKVIO(item); - // readCacheInvalidatePBN will check that we're on the correct queue. - readCacheInvalidatePBN(kvio->layer->ioSubmitter->readCache, - kvio->vio->physical); - kvdoEnqueueVIOCallback(kvio); -} - -/** - * Run a read cache-related work action in the appropriate work queue. - * The caller surrenders ownership of the work item object. - * - * Callers working with KVIOs probably should use - * runReadCacheActionOnKVIO instead. - * - * @param layer The kernel layer - * @param pbn The physical block number - * @param workItem The work item to enqueue - **/ -static void runReadCacheWorkItem(KernelLayer *layer, - PhysicalBlockNumber pbn, - KvdoWorkItem *workItem) -{ - // The work item is likely *not* a KVIO, so no I/O will happen, so - // don't get involved with the bio map code. - enqueueByPBNBioWorkItem(layer->ioSubmitter, pbn, workItem); -} - -/**********************************************************************/ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) -static void invalidatePBNBioCallback(BIO *bio) -#else -static void invalidatePBNBioCallback(BIO *bio, int error) -#endif -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) - int error = getBioResult(bio); -#endif - - bio->bi_end_io = completeAsyncBio; - - KVIO *kvio = (KVIO *) bio->bi_private; - kvioAddTraceRecord(kvio, THIS_LOCATION("$F($io);cb=io($io)")); - countCompletedBios(bio); - if (unlikely(error)) { - setCompletionResult(vioAsCompletion(kvio->vio), error); - } - - setupKVIOWork(kvio, invalidatePBNAndContinueVIO, NULL, - BIO_Q_ACTION_READCACHE); - runReadCacheWorkItem(kvio->layer, kvio->vio->physical, - &kvio->enqueueable.workItem); -} - -/**********************************************************************/ -void invalidateCacheAndSubmitBio(KVIO *kvio, BioQAction action) -{ - BIO *bio = kvio->bio; - BUG_ON(bio->bi_end_io != completeAsyncBio); - if (kvio->layer->ioSubmitter->readCache != NULL) { - bio->bi_end_io = invalidatePBNBioCallback; - } - BUG_ON(bio->bi_private != kvio); - submitBio(bio, action); -} - -/** - * Free a ReadCacheZone. - * - * @param readCacheZonePtr The ReadCacheZone to free - **/ -static void freeReadCacheZone(ReadCacheZone **readCacheZonePtr) -{ - if (*readCacheZonePtr == NULL) { - return; - } - - ReadCacheZone *zone = *readCacheZonePtr; - - // At shutdown, all entries should have refcount 0, and none should - // be "busy". - ReadCacheEntry *cacheEntry; - ReadCacheEntry *temp; - list_for_each_entry_safe(cacheEntry, temp, &zone->freeList, list) { - unsigned int refCount = getCacheEntryRefCount(zone, cacheEntry); - ASSERT_LOG_ONLY(refCount == 0, - "refcount (%u) of 'free' cache entry %" PRIptr " is 0", - refCount, cacheEntry); - freeBio(cacheEntry->bio, zone->layer); - FREE(cacheEntry); - } - bool first = true; // avoid redundant verbosity for same error - list_for_each_entry_safe(cacheEntry, temp, &zone->reclaimList, list) { - unsigned int refCount = getCacheEntryRefCount(zone, cacheEntry); - if (refCount != 0) { - if (first) { - ASSERT_LOG_ONLY(refCount == 0, - "refcount (%u) of 'reclaimable' cache entry %" PRIptr - " is 0", - refCount, cacheEntry); - first = false; - } - // Just one line per entry - dumpReadCacheEntry('R', cacheEntry); - } - freeBio(cacheEntry->bio, zone->layer); - FREE(cacheEntry); - } - ASSERT_LOG_ONLY(list_empty(&zone->busyList), - "'busy' cache entry list is empty at shutdown"); - list_for_each_entry_safe(cacheEntry, temp, &zone->busyList, list) { - dumpReadCacheEntry('B', cacheEntry); - freeBio(cacheEntry->bio, zone->layer); - FREE(cacheEntry); - } - freeIntMap(&zone->pbnMap); - FREE(zone->dataBlocks); - FREE(zone->blockMap); - FREE(zone); - - *readCacheZonePtr = NULL; -} - -/**********************************************************************/ -void freeReadCache(ReadCache **readCachePtr) -{ - ReadCache *readCache = *readCachePtr; - if (readCache == NULL) { - return; - } - *readCachePtr = NULL; - - for (unsigned int i = 0; i < readCache->zoneCount; i++) { - freeReadCacheZone(&readCache->zones[i]); - } - FREE(readCache); -} - -/** - * Allocate and initialize a ReadCacheZone. - * - * @param [in] layer The associated kernel layer - * @param [in] zoneNumber The zone number - * @param [in] numEntries The size of the read cache - * @param [out] readCacheZonePtr The new ReadCacheZone - * - * @return success or an error code - **/ -static int makeReadCacheZone(KernelLayer *layer, - unsigned int zoneNumber, - unsigned int numEntries, - ReadCacheZone **readCacheZonePtr) -{ - ReadCacheZone *zone; - int result = ALLOCATE(1, ReadCacheZone, "read cache zone", &zone); - if (result != VDO_SUCCESS) { - return result; - } - zone->numEntries = numEntries; - result = ALLOCATE(((size_t)zone->numEntries * (size_t)VDO_BLOCK_SIZE), - char, "read cache data", &zone->dataBlocks); - if (result != VDO_SUCCESS) { - FREE(zone); - return result; - } - result = ALLOCATE(zone->numEntries, ReadCacheEntry *, - "read cache block map", &zone->blockMap); - if (result != VDO_SUCCESS) { - FREE(zone->dataBlocks); - FREE(zone); - return result; - } - INIT_LIST_HEAD(&zone->busyList); - INIT_LIST_HEAD(&zone->reclaimList); - INIT_LIST_HEAD(&zone->freeList); - result = makeIntMap(zone->numEntries, 0, &zone->pbnMap); - if (result != VDO_SUCCESS) { - FREE(zone->dataBlocks); - FREE(zone); - return result; - } - - zone->layer = layer; - for (int i = 0; i < zone->numEntries; i++) { - ReadCacheEntry *cacheEntry; - result = ALLOCATE(1, ReadCacheEntry, "read cache entry", &cacheEntry); - if (result != VDO_SUCCESS) { - freeReadCacheZone(&zone); - return result; - } - cacheEntry->pbn = INVALID_BLOCK; - cacheEntry->dataBlock = zone->dataBlocks + ((uint64_t)i * VDO_BLOCK_SIZE); - result = createBio(zone->layer, cacheEntry->dataBlock, &cacheEntry->bio); - if (result != VDO_SUCCESS) { - FREE(cacheEntry); - freeReadCacheZone(&zone); - return result; - } - spin_lock_init(&cacheEntry->waitLock); - cacheEntry->zone = zone; - cacheEntry->entryNum = i; - zone->blockMap[i] = cacheEntry; - list_add(&cacheEntry->list, &zone->freeList); - } - - *readCacheZonePtr = zone; - - return VDO_SUCCESS; -} - -/**********************************************************************/ -int makeReadCache(KernelLayer *layer, - unsigned int numEntries, - unsigned int zoneCount, - ReadCache **readCachePtr) -{ - int result; - - ReadCache *readCache; - result = ALLOCATE_EXTENDED(ReadCache, zoneCount, ReadCacheZone *, - "read cache", &readCache); - if (result != VDO_SUCCESS) { - return result; - } - readCache->layer = layer; - readCache->zoneCount = zoneCount; - for (unsigned int i = 0; i < zoneCount; i++) { - result = makeReadCacheZone(layer, i, numEntries, &readCache->zones[i]); - if (result != VDO_SUCCESS) { - freeReadCache(&readCache); - return result; - } - } - *readCachePtr = readCache; - return VDO_SUCCESS; -} - -/**********************************************************************/ -static void readCacheZoneReleaseBlockWork(KvdoWorkItem *item) -{ - ReadCacheEntryRelease *release - = container_of(item, ReadCacheEntryRelease, workItem); - ReadCacheEntry *cacheEntry = container_of(release, ReadCacheEntry, release); - - while (atomicAdd32(&cacheEntry->release.releases, -1) > 0) { - releaseBlockInternal(cacheEntry); - } - releaseBlockInternal(cacheEntry); -} - -/** - * Returns a block addressed by the given physical block number from - * the cache if possible. If the block is not found in the cache, - * issues a read to the underlying device into a free block in the - * cache. Finally calls the callback provided in the DataKVIO's ReadBlock's - * callback. - * - * The physical block number and priority must already be set in the - * ReadBlock's fields as well. (The only caller of this function - * is kvdoReadBlock() which will have set those fields.) - * - * If an error occurs along the way, ReadBlock.status is set and - * some operations may be skipped, but the callback is still invoked. - * - * @param item The DataKVIO - **/ -static void readCacheBlockCallback(KvdoWorkItem *item) -{ - DataKVIO *dataKVIO = workItemAsDataKVIO(item); - ReadBlock *readBlock = &dataKVIO->readBlock; - KernelLayer *layer = getLayerFromDataKVIO(dataKVIO); - ReadCache *readCache = layer->ioSubmitter->readCache; - ReadCacheZone *zone = zoneForPBN(readCache, readBlock->pbn); - - if (ASSERT(zone != NULL, "specified read cache")) { - completeRead(dataKVIO, VDO_BAD_CONFIGURATION); - return; - } - - assertRunningInRCQueueForPBN(readCache, readBlock->pbn); - - ReadCacheEntry *cacheEntry = NULL; - int result = allocateBlockForReadInternal(zone, readBlock->pbn, &cacheEntry); - if (result != VDO_SUCCESS) { - completeRead(dataKVIO, result); - return; - } - - readBlock->cacheEntry = cacheEntry; - readBlock->data = cacheEntry->dataBlock; - - spin_lock(&cacheEntry->waitLock); - if (cacheEntry->dataValid) { - spin_unlock(&cacheEntry->waitLock); - completeRead(dataKVIO, VDO_SUCCESS); - return; - } - - bool issueIO = !hasWaiters(&cacheEntry->callbackWaiters); - result = enqueueDataVIO(&cacheEntry->callbackWaiters, &dataKVIO->dataVIO, - THIS_LOCATION("$F($io)")); - spin_unlock(&cacheEntry->waitLock); - - if (result != VDO_SUCCESS) { - completeRead(dataKVIO, result); - return; - } - - if (!issueIO) { - return; - } - - BIO *bio = cacheEntry->bio; - resetBio(bio, layer); - setBioSector(bio, blockToSector(layer, readBlock->pbn)); - bio->bi_end_io = readCacheBioCallback; - bio->bi_private = &dataKVIO->kvio; - - logDebug("%s: submitting read request for pbn %" PRIu64, __func__, - readBlock->pbn); - - sendBioToDevice(dataKVIOAsKVIO(dataKVIO), bio, THIS_LOCATION("$F($io)")); -} - -/**********************************************************************/ -void kvdoReadBlock(DataVIO *dataVIO, - PhysicalBlockNumber location, - BlockMappingState mappingState, - ReadBlockOperation operation, - DataKVIOCallback callback) -{ - dataVIOAddTraceRecord(dataVIO, THIS_LOCATION(NULL)); - - BioQAction action = BIO_Q_ACTION_DATA; - switch (operation) { - case READ_NO_OPERATION: - logError("unexpected ReadBlockOperation: %d", operation); - break; - - case READ_COMPRESSED_DATA: - action = BIO_Q_ACTION_COMPRESSED_DATA; - break; - - case READ_VERIFY_DEDUPE: - action = BIO_Q_ACTION_VERIFY; - break; - - default: - logError("undefined ReadBlockOperation: %d", operation); - break; - } - - DataKVIO *dataKVIO = dataVIOAsDataKVIO(dataVIO); - ReadBlock *readBlock = &dataKVIO->readBlock; - KernelLayer *layer = getLayerFromDataKVIO(dataKVIO); - runReadCacheReleaseBlock(layer, readBlock); - - readBlock->pbn = location; - readBlock->callback = callback; - readBlock->status = VDO_SUCCESS; - readBlock->mappingState = mappingState; - readBlock->action = action; - - BUG_ON(getBIOFromDataKVIO(dataKVIO)->bi_private != &dataKVIO->kvio); - if (readBlock->bio != NULL) { - // Read the data directly from the device using the read bio. - BIO *bio = readBlock->bio; - resetBio(bio, layer); - setBioSector(bio, blockToSector(layer, location)); - setBioOperationRead(bio); - bio->bi_end_io = readBioCallback; - submitBio(bio, action); - return; - } - - // Feed operations through the bio map to encourage sequential - // order in case we need to actually fetch the data. - enqueueBioMap(getBIOFromDataKVIO(dataKVIO), action, readCacheBlockCallback, - location); -} - -/**********************************************************************/ -void runReadCacheReleaseBlock(KernelLayer *layer, ReadBlock *readBlock) -{ - readBlock->data = NULL; - if (readBlock->cacheEntry == NULL) { - return; - } - - ReadCacheZone *zone = zoneForPBN(layer->ioSubmitter->readCache, - readBlock->pbn); - if (ASSERT(zone != NULL, "pbn maps to read cache")) { - return; - } - - if (atomicAdd32(&readBlock->cacheEntry->release.releases, 1) == 1) { - KvdoWorkItem *workItem = &readBlock->cacheEntry->release.workItem; - setupWorkItem(workItem, readCacheZoneReleaseBlockWork, NULL, - BIO_Q_ACTION_HIGH); - runReadCacheWorkItem(layer, readBlock->pbn, workItem); - } - - readBlock->cacheEntry = NULL; -} - -/**********************************************************************/ -static void dumpReadCacheEntry(char tag, ReadCacheEntry *entry) -{ - /* - * We may be logging a couple thousand of these lines, and in some - * circumstances syslogd may have trouble keeping up, so keep it - * BRIEF rather than user-friendly. - */ - spin_lock(&entry->waitLock); - - Waiter *first = getFirstWaiter(&entry->callbackWaiters); - char *maybeWaiters = (first != NULL) ? " waiters" : ""; - - /* - * Message format: - * - * #num B(usy)/R(ecl)/F(ree)[I(nvalid)] Refcount PBN @addr [err##] - * - * error==0 is the common case by far, so it's worth omitting it. - */ - uint32_t refCount = relaxedLoad32(&entry->refCount); - if (entry->error == 0) { - logInfo(" #%d %c%s R%u P%" PRIu64 " @%" PRIptr "%s", - entry->entryNum, tag, - entry->dataValid ? "" : "I", - refCount, entry->pbn, - entry->dataBlock, - maybeWaiters); - } else { - logInfo(" #%d %c%s R%u P%" PRIu64 " @%" PRIptr " err%d %s", - entry->entryNum, tag, - entry->dataValid ? "" : "I", - refCount, entry->pbn, - entry->dataBlock, entry->error, - maybeWaiters); - } - - if (first != NULL) { - Waiter *waiter = first; - do { - DataVIO *dataVIO = waiterAsDataVIO(waiter); - /* - * If we knew whether we were dumping all the VIOs too, maybe we - * could skip logging details of each waiter here. - */ - logInfo(" DataVIO %" PRIptr " P%" PRIu64 " L%" PRIu64 " D%" PRIu64 - " op %s", - dataVIO, dataVIO->mapped.pbn, dataVIO->logical.lbn, - dataVIO->duplicate.pbn, getOperationName(dataVIO)); - waiter = waiter->nextWaiter; - } while (waiter != first); - } - - spin_unlock(&entry->waitLock); -} - -/**********************************************************************/ -static void readCacheZoneDump(ReadCacheZone *zone, - bool dumpBusyElements, - bool dumpAllElements) -{ - if (ASSERT(zone != NULL, "specified read cache")) { - return; - } - - ReadCacheStats stats = readCacheZoneGetStats(zone); - logInfo("Read cache %" PRIptr ":" - " %" PRIu64 " accesses %" PRIu64 " hits %" PRIu64 " data hits" - " %u entries", - zone, stats.accesses, stats.hits, stats.dataHits, - zone->numEntries); - - unsigned int numFreeItems = 0; - unsigned int numReclaimItems = 0; - unsigned int numBusyItems = 0; - - for (int i = 0; i < zone->numEntries; i++) { - CacheEntryState state = getState(zone->blockMap[i]); - switch (state) { - case RC_FREE: - numFreeItems++; - if (dumpAllElements) { - dumpReadCacheEntry('F', zone->blockMap[i]); - } - break; - - case RC_IN_USE: - case RC_IN_USE_PBN: - numBusyItems++; - if (dumpBusyElements || dumpAllElements) { - dumpReadCacheEntry('B', zone->blockMap[i]); - } - break; - - case RC_RECLAIMABLE: - numReclaimItems++; - if (dumpAllElements) { - dumpReadCacheEntry('R', zone->blockMap[i]); - } - break; - - default: - ASSERT_LOG_ONLY(false, - "cache entry state (%d) among expected values", state); - break; - } - } - - logInfo("Read cache %" PRIptr ": %u free %u reclaimable %u busy", - zone, numFreeItems, numReclaimItems, numBusyItems); -} - -/**********************************************************************/ -void readCacheDump(ReadCache *readCache, - bool dumpBusyElements, - bool dumpAllElements) -{ - if (readCache == NULL) { - return; - } - - for (int i = 0; i < readCache->zoneCount; i++) { - logInfo("Read cache zone %d:", i); - readCacheZoneDump(readCache->zones[i], - dumpBusyElements, dumpAllElements); - } -} diff --git a/vdo/kernel/readCache.h b/vdo/kernel/readCache.h deleted file mode 100644 index 0bacb587..00000000 --- a/vdo/kernel/readCache.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2018 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/readCache.h#1 $ - */ -#ifndef READCACHE_H -#define READCACHE_H - -#include "dataKVIO.h" -#include "ioSubmitter.h" -#include "kernelLayer.h" - -typedef enum readBlockOperation { - READ_NO_OPERATION = 0, - READ_COMPRESSED_DATA, - READ_VERIFY_DEDUPE, -} ReadBlockOperation; - -/** - * Allocate and initialize a ReadCache. - * - * @param [in] layer The associated kernel layer - * @param [in] numEntries The number of read cache entries per zone - * @param [in] zoneCount The number of zones to create - * @param [out] readCachePtr The new ReadCache - * - * @return success or an error code - **/ -int makeReadCache(KernelLayer *layer, - unsigned int numEntries, - unsigned int zoneCount, - ReadCache **readCachePtr); - -/** - * Free a ReadCache. - * - * @param readCachePtr The ReadCache to free - **/ -void freeReadCache(ReadCache **readCachePtr); - -/** - * Will fetch the data for a block, either from the cache or from storage. The - * fetched data will be uncompressed when the callback is called, at which - * point the result of the read operation will be in the DataKVIO's ReadBlock's - * status field, and, on success, the data will be in the ReadBlock's buffer. - * - * @param dataVIO The DataVIO to read a block in for - * @param location The physical block number to read from - * @param mappingState The mapping state of the block to read - * @param operation The read block operation to perform - * @param callback The function to call when the read is done - **/ -void kvdoReadBlock(DataVIO *dataVIO, - PhysicalBlockNumber location, - BlockMappingState mappingState, - ReadBlockOperation operation, - DataKVIOCallback callback); - -/** - * Releases a block in a read cache, possibly scheduling the work to be done in - * a different thread associated with the specified pbn. The caller surrenders - * use of the block at the time of the call. - * - * @param layer The kernel layer - * @param readBlock The ReadBlock to release - **/ -void runReadCacheReleaseBlock(KernelLayer *layer, ReadBlock *readBlock); - -/** - * Returns the operational statistics of the read cache. - * - * @param readCache The read cache - * - * @return ReadCacheStats - **/ -ReadCacheStats readCacheGetStats(ReadCache *readCache); - -/** - * Dump the read cache to the log. - * - * @param readCache The read cache - * @param dumpBusyElements True for list of busy cache entries - * @param dumpAllElements True for complete output - **/ -void readCacheDump(ReadCache *readCache, - bool dumpBusyElements, - bool dumpAllElements); - -/** - * Invalidate any read cache entries corresponding to the physical - * address in the KVIO, and submit kvio->bio to the device. Some or - * all of the actual work may be performed in another thread; the - * caller surrenders control of the KVIO. - * - * After the I/O operation completes, the kvio->bio->bi_end_io - * callback is invoked. - * - * @param kvio The KVIO with the bio to be submitted - * @param action The work queue action code to prioritize processing - **/ -void invalidateCacheAndSubmitBio(KVIO *kvio, BioQAction action); - -#endif /* READCACHE_H */ diff --git a/vdo/kernel/statusProcfs.c b/vdo/kernel/statusProcfs.c index 6aaebb8e..abb6a831 100644 --- a/vdo/kernel/statusProcfs.c +++ b/vdo/kernel/statusProcfs.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/statusProcfs.c#3 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/statusProcfs.c#4 $ * * Proc filesystem interface to the old GET_DEDUPE_STATS and * GET_KERNEL_STATS ioctls, which can no longer be supported in 4.4 @@ -141,7 +141,6 @@ void getKernelStats(KernelLayer *layer, KernelStatistics *stats) &layer->biosAcknowledgedPartial); stats->biosInProgress = subtractBioStats(stats->biosIn, stats->biosAcknowledged); - getBioWorkQueueReadCacheStats(layer->ioSubmitter, &stats->readCache); stats->memoryUsage = getMemoryUsage(); getIndexStatistics(layer->dedupeIndex, &stats->index); } diff --git a/vdo/kernel/sysfs.c b/vdo/kernel/sysfs.c index 490a9f12..c075556c 100644 --- a/vdo/kernel/sysfs.c +++ b/vdo/kernel/sysfs.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/sysfs.c#3 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/sysfs.c#4 $ */ #include "sysfs.h" @@ -28,7 +28,6 @@ #include "dmvdo.h" #include "logger.h" -extern unsigned int maxDiscardSectors; extern int defaultMaxRequestsActive; typedef struct vdoAttribute { @@ -189,14 +188,6 @@ static ssize_t vdoMaxReqActiveStore(struct kvdoDevice *device, return scanInt(buf, n, &defaultMaxRequestsActive, 1, MAXIMUM_USER_VIOS); } -/**********************************************************************/ -static ssize_t vdoMaxDiscardSectors(struct kvdoDevice *device, - const char *buf, - size_t n) -{ - return scanUInt(buf, n, &maxDiscardSectors, 8, UINT_MAX); -} - /**********************************************************************/ static ssize_t vdoAlbireoTimeoutIntervalStore(struct kvdoDevice *device, const char *buf, @@ -278,13 +269,6 @@ static VDOAttribute vdoMaxReqActiveAttr = { .valuePtr = &defaultMaxRequestsActive, }; -static VDOAttribute vdoMaxDiscardSectorsAttr = { - .attr = {.name = "max_discard_sectors", .mode = 0644, }, - .show = showUInt, - .store = vdoMaxDiscardSectors, - .valuePtr = &maxDiscardSectors, -}; - static VDOAttribute vdoAlbireoTimeoutInterval = { .attr = {.name = "deduplication_timeout_interval", .mode = 0644, }, .show = showUInt, @@ -315,7 +299,6 @@ static struct attribute *defaultAttrs[] = { &vdoStatusAttr.attr, &vdoLogLevelAttr.attr, &vdoMaxReqActiveAttr.attr, - &vdoMaxDiscardSectorsAttr.attr, &vdoAlbireoTimeoutInterval.attr, &vdoMinAlbireoTimerInterval.attr, &vdoTraceRecording.attr, diff --git a/vdo/kernel/verify.c b/vdo/kernel/verify.c index ec2f0fa5..54efd4ec 100644 --- a/vdo/kernel/verify.c +++ b/vdo/kernel/verify.c @@ -16,7 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * - * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/verify.c#1 $ + * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/verify.c#3 $ */ #include "verify.h" @@ -25,7 +25,6 @@ #include "dataKVIO.h" #include "numeric.h" -#include "readCache.h" /** * Compare blocks of memory for equality. @@ -140,7 +139,7 @@ void kvdoVerifyDuplication(DataVIO *dataVIO) = THIS_LOCATION("verifyDuplication;dup=update(verify);io=verify"); dataVIOAddTraceRecord(dataVIO, location); kvdoReadBlock(dataVIO, dataVIO->duplicate.pbn, dataVIO->duplicate.state, - READ_VERIFY_DEDUPE, verifyReadBlockCallback); + BIO_Q_ACTION_VERIFY, verifyReadBlockCallback); } /**********************************************************************/