From 8ae0df29caad29ef7384b5d9c790b3bc5b3df3e3 Mon Sep 17 00:00:00 2001 From: Dinesh <97143739+dinesh-aot@users.noreply.github.com> Date: Fri, 2 Jun 2023 09:41:15 -0700 Subject: [PATCH] Performance optimization (#714) * Keycloak user migration with role mappings * document search performance and search optimization in general (#687) * change in the replaceAll function usage (#691) --- api/aggregators/documentAggregator.js | 10 +- api/aggregators/searchAggregator.js | 107 +++------------- api/controllers/search.js | 21 ++-- api/helpers/aggregators.js | 118 ++++++++++++++++++ app.js | 1 + migrations/20190703105100-addProjectPhase.js | 2 +- .../kc_migration/custom_realm_users/script.js | 3 +- package.json | 1 + 8 files changed, 160 insertions(+), 103 deletions(-) diff --git a/api/aggregators/documentAggregator.js b/api/aggregators/documentAggregator.js index 2edf9e14..ed42913d 100644 --- a/api/aggregators/documentAggregator.js +++ b/api/aggregators/documentAggregator.js @@ -26,7 +26,7 @@ exports.createMatchAggr = async (schemaName, projectId, keywords, caseSensitive, } if (keywords) { - keywordModifier = { $text: { $search: keywords, $caseSensitive: caseSensitive } }; + keywordModifier = { $text: { $search: "\""+keywords+"\"", $caseSensitive: caseSensitive} }; } // query modifiers @@ -142,7 +142,7 @@ exports.createMatchAggr = async (schemaName, projectId, keywords, caseSensitive, * @param {array} roles Set of user roles * @returns {array} Aggregate for documents. */ -exports.createDocumentAggr = (populate, roles) => { +exports.createDocumentAggr = (populate, roles, sortingValue, sortField, sortDirection, pageNum, pageSize) => { let aggregation = []; // Allow documents to be sorted by status based on publish existence @@ -185,9 +185,11 @@ exports.createDocumentAggr = (populate, roles) => { } }); } + var sortAggregation = aggregateHelper.createSortingPagingAggr('Document', sortingValue, sortField, sortDirection, pageNum, pageSize); + aggregation = [...aggregation, ...sortAggregation]; if (populate) { - // Handle project. + //Handle project. aggregation.push( { '$lookup': { @@ -199,7 +201,7 @@ exports.createDocumentAggr = (populate, roles) => { }, { '$addFields': { - project: '$project', + 'project': '$project', } }, { diff --git a/api/aggregators/searchAggregator.js b/api/aggregators/searchAggregator.js index 551f4934..49a978e2 100644 --- a/api/aggregators/searchAggregator.js +++ b/api/aggregators/searchAggregator.js @@ -26,7 +26,8 @@ exports.createMatchAggr = async (schemaName, projectId, keywords, caseSensitive, } if (keywords) { - let keywordSearch = fuzzy && !keywords.startsWith("\"") && !keywords.endsWith("\"") ? fuzzySearch.createFuzzySearchString(keywords, 4, caseSensitive) : keywords; + keywords = keywords.replace(/"/g,"").trim(); + let keywordSearch = fuzzy && !keywords.startsWith("\"") && !keywords.endsWith("\"") ? fuzzySearch.createFuzzySearchString(keywords, 4, caseSensitive) : "\""+ keywords +"\""; keywordModifier = { $text: { $search: keywordSearch, $caseSensitive: caseSensitive } }; } @@ -124,94 +125,24 @@ exports.createKeywordRegexAggr = function(decodedKeywords, schemaName) { return keywordRegexFilter; }; -/** - * Create an aggregation that sets the sorting and paging for a query. - * - * @param {string} schemaName Schema being searched on - * @param {array} sortValues Values to sort by - * @param {string} sortField Single field to sort by - * @param {number} sortDirection Direction of sort - * @param {number} pageNum Page number to offset results by - * @param {number} pageSize Result set size - * - * @returns {array} Aggregation of sorting and paging - */ -exports.createSortingPagingAggr = function(schemaName, sortValues, sortField, sortDirection, pageNum, pageSize) { - const searchResultAggregation = []; - let datePostedHandlingTruncating = false; - if (sortField && sortValues !=null && typeof sortValues != "undefined" && sortField.includes(",") || Object.keys(sortValues).length > 1){ - //sort will have multiple values passed - if (sortField.includes("datePosted") || Object.prototype.hasOwnProperty.call(sortValues, "datePosted")){ - //datePosted is too specfic(in it's time) and needs the truncated form of date, can be expanded if other dates are required to be truncated - let tempSortValues = { }; - for (let property in sortValues){ - if (Object.prototype.hasOwnProperty.call(sortValues, property)) { - if (property === "datePosted"){ - tempSortValues['date'] = sortValues[property]; - } else { - tempSortValues[property] = sortValues[property]; - } - } - } - sortValues = tempSortValues; - datePostedHandlingTruncating = true; - } - - } else { - // if sortField is null, this would create a broken sort, so ignore it if its null - if(sortField && sortValues && sortValues[sortField]) { - sortValues[sortField] = sortDirection; - } - } - - // if we have no sorting going on, we should sort by the score - if(!sortField) { - sortValues = { score: -1 }; - } - // We don't want to have sort in the aggregation if the front end doesn't need sort. - if (sortField && sortDirection) { - if(datePostedHandlingTruncating){ - // Currently this is just handling datePosted, if more date variables are needed change datePosted to a variable and detect it above - searchResultAggregation.push( - - { $addFields: { - 'date': - { $dateToString: { - 'format': '%Y-%m-%d', 'date': '$datePosted' - }} - - }}, - { $sort: sortValues } - ); - } else { - searchResultAggregation.push( - { - $sort: sortValues - } - ); - } - } - - searchResultAggregation.push( +exports.createResultAggregator = function () { + return [ { - $skip: pageNum * pageSize - }, - { - $limit: pageSize - }, - ); - - const combinedAggregation = [{ - $facet: { - searchResults: searchResultAggregation, - meta: [ - { - $count: 'searchResultsTotal' - } - ] + $facet: { + searchResults: [{ + $match: {} + }], + meta: [ + { $limit: 1 }, + { + $addFields: { + "searchResultsTotal": "$totalCount" + } + }, + { $project: { "searchResultsTotal": 1, "_id": 0 } } + ] + } } - }]; - - return combinedAggregation; + ]; }; diff --git a/api/controllers/search.js b/api/controllers/search.js index ce81cae9..2922e338 100644 --- a/api/controllers/search.js +++ b/api/controllers/search.js @@ -15,6 +15,7 @@ const inspectionAggregator = require('../aggregators/inspectionAggregator'); const notificationProjectAggregator = require('../aggregators/notificationProjectAggregator'); const itemAggregator = require('../aggregators/itemAggregator'); const searchAggregator = require('../aggregators/searchAggregator'); +const aggregateHelper = require('../helpers/aggregators'); const searchCollection = async function (roles, keywords, schemaName, pageNum, pageSize, project, projectLegislation, sortField = undefined, sortDirection = undefined, caseSensitive, populate = false, and, or, sortingValue, categorized, fuzzy) { const aggregateCollation = { @@ -34,7 +35,7 @@ const searchCollection = async function (roles, keywords, schemaName, pageNum, p switch (schemaName) { case constants.DOCUMENT: matchAggregation = await documentAggregator.createMatchAggr(schemaName, project, decodedKeywords, caseSensitive, or, and, categorized, roles, fuzzy); - schemaAggregation = documentAggregator.createDocumentAggr(populate, roles,); + schemaAggregation = documentAggregator.createDocumentAggr(populate, roles, sortingValue, sortField, sortDirection, pageNum, pageSize); break; case constants.PROJECT: matchAggregation = await searchAggregator.createMatchAggr(schemaName, project, decodedKeywords, caseSensitive, or, and, roles, fuzzy); @@ -87,17 +88,19 @@ const searchCollection = async function (roles, keywords, schemaName, pageNum, p } // keyword regex - let keywordRegexFilter = !fuzzy && decodedKeywords ? searchAggregator.createKeywordRegexAggr(decodedKeywords, schemaName) : []; + let keywordRegexFilter = [];//!fuzzy && decodedKeywords ? searchAggregator.createKeywordRegexAggr(decodedKeywords, schemaName) : []; // Create the sorting and paging aggregations. - const sortingPagingAggr = searchAggregator.createSortingPagingAggr(schemaName, sortingValue, sortField, sortDirection, pageNum, pageSize); + // For Document schema, the sorting and pagination pipelines have already been added for performance purpose + const resultAggr = (schemaName === constants.DOCUMENT?searchAggregator.createResultAggregator(): + aggregateHelper.createSortingPagingAggr(schemaName, sortingValue, sortField, sortDirection, pageNum, pageSize)); // Combine all the aggregations. let aggregation; if (!schemaAggregation) { - aggregation = [...matchAggregation, ...keywordRegexFilter, ...sortingPagingAggr]; + aggregation = [...matchAggregation, ...keywordRegexFilter, ...resultAggr]; } else { - aggregation = [...matchAggregation, ...schemaAggregation, ...keywordRegexFilter, ...sortingPagingAggr]; + aggregation = [...matchAggregation, ...schemaAggregation, ...keywordRegexFilter, ...resultAggr]; } return new Promise(function (resolve, reject) { @@ -151,7 +154,7 @@ const executeQuery = async function (args, res) { return Actions.sendResponse(res, 400, { }); } - Utils.recordAction('Search', keywords, args.swagger.params.auth_payload ? args.swagger.params.auth_payload.preferred_username : 'public'); + await Utils.recordAction('Search', keywords, args.swagger.params.auth_payload ? args.swagger.params.auth_payload.preferred_username : 'public'); let sortDirection = undefined; let sortField = undefined; @@ -233,11 +236,11 @@ const executeQuery = async function (args, res) { /***** Exported functions *****/ exports.publicGet = async function (args, res) { - executeQuery(args, res); + await executeQuery(args, res); }; -exports.protectedGet = function (args, res) { - executeQuery(args, res); +exports.protectedGet = async function (args, res) { + await executeQuery(args, res); }; exports.protectedOptions = function (args, res) { diff --git a/api/helpers/aggregators.js b/api/helpers/aggregators.js index 7ae8bf40..56b6ba78 100644 --- a/api/helpers/aggregators.js +++ b/api/helpers/aggregators.js @@ -448,6 +448,123 @@ const isEmpty = (obj) => { return true; }; +/** + * Create an aggregation that sets the sorting and paging for a query. + * + * @param {string} schemaName Name of the schema + * @param {array} sortValues Values to sort by + * @param {string} sortField Single field to sort by + * @param {number} sortDirection Direction of sort + * @param {number} pageNum Page number to offset results by + * @param {number} pageSize Result set size + * + * @returns {array} Aggregation of sorting and paging + */ + const createSortingPagingAggr = function(schemaName, sortValues, sortField, sortDirection, pageNum, pageSize) { + const searchResultAggregation = []; + let datePostedHandlingTruncating = false; + if (sortField && sortValues !=null && typeof sortValues != "undefined" && sortField.includes(",") || Object.keys(sortValues).length > 1){ + //sort will have multiple values passed + if (sortField.includes("datePosted") || Object.prototype.hasOwnProperty.call(sortValues, "datePosted")){ + //datePosted is too specfic(in it's time) and needs the truncated form of date, can be expanded if other dates are required to be truncated + let tempSortValues = { }; + for (let property in sortValues){ + if (Object.prototype.hasOwnProperty.call(sortValues, property)) { + if (property === "datePosted"){ + tempSortValues['date'] = sortValues[property]; + } else { + tempSortValues[property] = sortValues[property]; + } + } + } + sortValues = tempSortValues; + datePostedHandlingTruncating = true; + } + + } else { + // if sortField is null, this would create a broken sort, so ignore it if its null + if(sortField && sortValues && sortValues[sortField]) { + sortValues[sortField] = sortDirection; + } + } + + // if we have no sorting going on, we should sort by the score + if(!sortField) { + sortValues = { score: -1 }; + } + + // We don't want to have sort in the aggregation if the front end doesn't need sort. + if (sortField && sortDirection) { + if(datePostedHandlingTruncating){ + // Currently this is just handling datePosted, if more date variables are needed change datePosted to a variable and detect it above + searchResultAggregation.push( + + { $addFields: { + 'date': + { $dateToString: { + 'format': '%Y-%m-%d', 'date': '$datePosted' + }} + + }}, + { $sort: sortValues } + ); + } else { + searchResultAggregation.push( + { + $sort: sortValues + } + ); + } + } + + searchResultAggregation.push( + { + $skip: pageNum * pageSize + }, + { + $limit: pageSize + }, + ); + + const combinedAggregation = [ + { + $facet: { + searchResults: searchResultAggregation, + meta: [ + { + $count: "searchResultsTotal" + } + ] + } + } + ]; + + // add a new field to store the totalCount which will later used to + // produce the searchResultsTotal in the final output + if(schemaName === constants.DOCUMENT) { + combinedAggregation.push({ + $addFields: { + 'searchResults.totalCount': { + $let: { + vars: { + item: {$arrayElemAt:["$meta",0]} + }, + in: "$$item.searchResultsTotal" + } + } + } + },{ + $unwind: { + path: '$searchResults' + } + },{ + $replaceRoot: {newRoot: '$searchResults'} + }); + } + + return combinedAggregation; +}; + // Exporting here so that the functions can be used in // this file and exported. exports.setProjectDefault = setProjectDefault; @@ -455,3 +572,4 @@ exports.unwindProjectData = unwindProjectData; exports.addProjectLookupAggrs = addProjectLookupAggrs; exports.generateExpArray = generateExpArray; exports.isEmpty = isEmpty; +exports.createSortingPagingAggr = createSortingPagingAggr; diff --git a/app.js b/app.js index d33e26d7..7fa6e808 100644 --- a/app.js +++ b/app.js @@ -1,5 +1,6 @@ 'use strict'; +require('dotenv').config(); var app = require('express')(); var fs = require('fs'); var uploadDir = process.env.UPLOAD_DIRECTORY || './uploads/'; diff --git a/migrations/20190703105100-addProjectPhase.js b/migrations/20190703105100-addProjectPhase.js index 3c8fd0bc..92492029 100644 --- a/migrations/20190703105100-addProjectPhase.js +++ b/migrations/20190703105100-addProjectPhase.js @@ -14,7 +14,7 @@ exports.setup = function(options, seedLink) { seed = seedLink; }; -let listItems = require(process.cwd() + '/migrations_data/20190703105100-new-projectPhases.js'); +let listItems = require(process.cwd() + '/migrations_data/lists/20190703105100-new-projectPhases.js'); exports.up = function(db) { let mClient; diff --git a/openshift/kc_migration/custom_realm_users/script.js b/openshift/kc_migration/custom_realm_users/script.js index 0f35dca7..546f1bf0 100644 --- a/openshift/kc_migration/custom_realm_users/script.js +++ b/openshift/kc_migration/custom_realm_users/script.js @@ -306,7 +306,8 @@ async function main() { } } -main(); +main().catch((err)=>console.error('Migration end with error', JSON.stringify(err))) +.then(()=>console.log('Migration Completed Successfully')); //returns user guid, please modify accordingly if required //by default it uses idp specific user attribute to fetch the guid diff --git a/package.json b/package.json index 0232f06a..93f6464c 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "csv": "~5.1.1", "db-migrate": "~0.11.4", "db-migrate-mongodb": "~1.5.0", + "dotenv": "^16.0.1", "epsg": "~0.5.0", "express": "~4.16.0", "flake-idgen": "~1.1.0",