diff --git a/config/default.json b/config/default.json index 4f2f48e..6b0633d 100644 --- a/config/default.json +++ b/config/default.json @@ -46,6 +46,12 @@ "NFS": { "pvPath": "/Path/To/Models" }, + "crawling": { + "extension": ".json", + "nestedJsonPath": "$.root..uri", + "ignoreNotFound": true, + "underlying": "NFS" + }, "ingestion": { "provider": "NFS", "blackList": ["tar", "zip", "rar", "7z"] diff --git a/config/test.json b/config/test.json index 81766b3..7008d53 100644 --- a/config/test.json +++ b/config/test.json @@ -1,4 +1,10 @@ { + "crawling": { + "extension": ".json", + "nestedJsonPath": "$.root..uri", + "ignoreNotFound": true, + "underlying": "NFS" + }, "S3": { "accessKeyId": "minioadmin", "secretAccessKey": "minioadmin", diff --git a/package-lock.json b/package-lock.json index 2fd760a..174723f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.1.1", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -49,6 +50,7 @@ "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", @@ -10071,6 +10073,13 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/jsonpath": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/@types/jsonpath/-/jsonpath-0.2.4.tgz", + "integrity": "sha512-K3hxB8Blw0qgW6ExKgMbXQv2UPZBoE2GqLpVY+yr7nMD2Pq86lsuIzyAaiQ7eMqFL5B6di6pxSkogLJEyEHoGA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/keygrip": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/@types/keygrip/-/keygrip-1.0.6.tgz", @@ -13033,8 +13042,7 @@ "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", - "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", - "dev": true + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" }, "node_modules/deepmerge": { "version": "4.3.1", @@ -13614,6 +13622,87 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "1.14.3", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.14.3.tgz", + "integrity": "sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^4.2.0", + "esutils": "^2.0.2", + "optionator": "^0.8.1" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=4.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/escodegen/node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/escodegen/node_modules/levn": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", + "integrity": "sha512-0OO4y2iOHix2W6ujICbKIaEQXvFQHue65vUG3pb5EUomzPI90z9hsA1VsO/dbIIpC53J8gxM9Q4Oho0jrCM/yA==", + "license": "MIT", + "dependencies": { + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/optionator": { + "version": "0.8.3", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.3.tgz", + "integrity": "sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==", + "license": "MIT", + "dependencies": { + "deep-is": "~0.1.3", + "fast-levenshtein": "~2.0.6", + "levn": "~0.3.0", + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2", + "word-wrap": "~1.2.3" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/prelude-ls": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", + "integrity": "sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==", + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/type-check": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", + "integrity": "sha512-ZCmOJdvOWDBYJlzAoFkC+Q0+bUyEOS1ltgp1MGU03fqHG+dbi9tBFU2Rd9QKiDZFAYrhPh2JUf7rZRIuHRKtOg==", + "license": "MIT", + "dependencies": { + "prelude-ls": "~1.1.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/eslint": { "version": "8.53.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.53.0.tgz", @@ -14546,7 +14635,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, "bin": { "esparse": "bin/esparse.js", "esvalidate": "bin/esvalidate.js" @@ -14592,7 +14680,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, "engines": { "node": ">=0.10.0" } @@ -14848,8 +14935,7 @@ "node_modules/fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", - "dev": true + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==" }, "node_modules/fast-redact": { "version": "3.3.0", @@ -17533,6 +17619,17 @@ "node >= 0.2.0" ] }, + "node_modules/jsonpath": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.1.1.tgz", + "integrity": "sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w==", + "license": "MIT", + "dependencies": { + "esprima": "1.2.2", + "static-eval": "2.0.2", + "underscore": "1.12.1" + } + }, "node_modules/jsonpath-plus": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz", @@ -17552,6 +17649,18 @@ "node": ">=18.0.0" } }, + "node_modules/jsonpath/node_modules/esprima": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-1.2.2.tgz", + "integrity": "sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/jsonpointer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", @@ -21323,6 +21432,15 @@ "node": ">=10" } }, + "node_modules/static-eval": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.0.2.tgz", + "integrity": "sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==", + "license": "MIT", + "dependencies": { + "escodegen": "^1.8.1" + } + }, "node_modules/statuses": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", @@ -22248,6 +22366,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/underscore": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.12.1.tgz", + "integrity": "sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw==", + "license": "MIT" + }, "node_modules/undici": { "version": "6.21.3", "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", @@ -22573,6 +22697,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", diff --git a/package.json b/package.json index cdba821..99a9bbc 100644 --- a/package.json +++ b/package.json @@ -48,12 +48,12 @@ "@map-colonies/error-express-handler": "^2.1.0", "@map-colonies/express-access-log-middleware": "^2.0.1", "@map-colonies/js-logger": "^1.0.1", - "@map-colonies/types": "^1.7.0", "@map-colonies/mc-model-types": "^19.0.0", "@map-colonies/mc-priority-queue": "^8.2.1", "@map-colonies/openapi-express-viewer": "^3.0.0", "@map-colonies/read-pkg": "0.0.1", "@map-colonies/telemetry": "^6.1.0", + "@map-colonies/types": "^1.7.0", "@opentelemetry/api": "1.7.0", "@opentelemetry/api-metrics": "0.23.0", "@opentelemetry/context-async-hooks": "^1.24.1", @@ -65,6 +65,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.1.1", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -76,12 +77,13 @@ "@faker-js/faker": "^8.4.1", "@map-colonies/eslint-config": "^4.0.0", "@map-colonies/prettier-config": "0.0.1", - "@redocly/openapi-cli": "^1.0.0-beta.94", "@redocly/cli": "^1.34.3", + "@redocly/openapi-cli": "^1.0.0-beta.94", "@types/compression": "^1.7.2", "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", diff --git a/src/common/constants.ts b/src/common/constants.ts index 38e84f4..464a508 100644 --- a/src/common/constants.ts +++ b/src/common/constants.ts @@ -24,5 +24,6 @@ export const SERVICES: Record = { PROVIDER_CONFIG: Symbol('ProviderConfig'), QUEUE_FILE_HANDLER: Symbol('QueueFileHandler'), JOB_MANAGER_CLIENT: Symbol('JobManagerClient'), + UNDERLYING: Symbol('Underlying'), }; /* eslint-enable @typescript-eslint/naming-convention */ diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 273d0c1..4bfa8c6 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -29,8 +29,10 @@ export interface DeletePayload { producerName: string; } +// ToDo: merge this class with the identical class in file-syncer export interface Provider { streamModelPathsToQueueFile: (modelId: string, pathToTileset: string, productName: string) => Promise; + getFile: (filePath: string) => Promise; } export interface IngestionJobParameters { @@ -70,7 +72,14 @@ export interface NFSConfig { pvPath: string; } -export type ProviderConfig = S3Config | NFSConfig; +export interface CrawlingConfig { + extension: string; + nestedJsonPath: string; + ignoreNotFound?: boolean; + underlying?: string; +} + +export type ProviderConfig = S3Config | NFSConfig | CrawlingConfig; export interface JobOperationResponse { jobId: string; diff --git a/src/containerConfig.ts b/src/containerConfig.ts index 5a88d99..35f93ac 100644 --- a/src/containerConfig.ts +++ b/src/containerConfig.ts @@ -7,7 +7,6 @@ import jsLogger, { LoggerOptions } from '@map-colonies/js-logger'; import client from 'prom-client'; import { JobManagerClient } from '@map-colonies/mc-priority-queue'; import { SERVICES, SERVICE_NAME } from './common/constants'; -import { Provider, ProviderConfig } from './common/interfaces'; import { tracing } from './common/tracing'; import { jobOperationsRouterFactory, JOB_OPERATIONS_ROUTER_SYMBOL } from './jobOperations/routes/jobOperationsRouter'; import { InjectionObject, registerDependencies } from './common/dependencyRegistration'; @@ -62,18 +61,14 @@ export const registerExternalValues = (options?: RegisterOptions): DependencyCon { token: SERVICES.PROVIDER_CONFIG, provider: { - useFactory: (): ProviderConfig => { - return getProviderConfig(provider); - }, + useFactory: (container) => getProviderConfig(container), }, }, { token: SERVICES.QUEUE_FILE_HANDLER, provider: { useClass: QueueFileHandler } }, { token: SERVICES.PROVIDER, provider: { - useFactory: (): Provider => { - return getProvider(provider); - }, + useFactory: (container) => getProvider(provider, container), }, }, { diff --git a/src/providers/crawlingProvider.ts b/src/providers/crawlingProvider.ts new file mode 100644 index 0000000..d7a4a9b --- /dev/null +++ b/src/providers/crawlingProvider.ts @@ -0,0 +1,89 @@ +import Path from 'path'; +import { Logger } from '@map-colonies/js-logger'; +import { StatusCodes } from 'http-status-codes'; +import { inject, injectable } from 'tsyringe'; +import { Tracer } from '@opentelemetry/api'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; +import jsonpath from 'jsonpath'; +import { QueueFileHandler } from '../handlers/queueFileHandler'; +import { AppError } from '../common/appError'; +import { SERVICES } from '../common/constants'; +import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; + +@injectable() +export class CrawlingProvider implements Provider { + private readonly logContext: LogContext; + + public constructor( + @inject(SERVICES.LOGGER) protected readonly logger: Logger, + @inject(SERVICES.TRACER) public readonly tracer: Tracer, + @inject(SERVICES.PROVIDER_CONFIG) protected readonly config: CrawlingConfig, + @inject(SERVICES.UNDERLYING) protected readonly underlying: Provider, + @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler + ) { + this.logContext = { + fileName: __filename, + class: CrawlingProvider.name, + }; + if (this.underlying instanceof CrawlingProvider) { + throw new AppError(StatusCodes.BAD_REQUEST, `Invalid config in provider: Do not nest crawling providers.`, false); + } + } + + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + return this.underlying.getFile(filePath); + } + + @withSpanAsyncV4 + public async streamModelPathsToQueueFile(modelId: string, path: string, modelName: string): Promise { + const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; + let buffer: Buffer; + try { + buffer = await this.underlying.getFile(path); + } catch (err) { + if (this.config.ignoreNotFound! && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + this.logger.warn({ + msg: 'Found a non-existing file, but instructed to ignore. Skipping...', + logContext, + path, + modelId, + modelName, + }); + return 0; + } else { + throw err; + } + } + const fileContent = buffer.toString(); + let file: object = {}; + try { + file = JSON.parse(fileContent) as object; + } catch (err) { + if (err instanceof SyntaxError) { + this.logger.error({ + msg: 'File is not a valid JSON', + logContext, + path, + modelId, + modelName, + }); + throw new AppError(StatusCodes.NOT_ACCEPTABLE, 'File is not a valid JSON', false); + } else { + throw err; + } + } + + const nestedFiles = jsonpath.query(file, this.config.nestedJsonPath).map((child: string) => Path.resolve('/', Path.dirname(path), child)); + const leafs = nestedFiles.filter((path) => !path.endsWith(this.config.extension)); + const addedFilePromises = [...leafs, path].map(async (path) => { + await this.queueFileHandler.writeFileNameToQueueFile(modelId, path); + return 1; + }); + + const children = nestedFiles.filter((path) => path.endsWith(this.config.extension)); + const countPromises = children.map(async (path) => this.streamModelPathsToQueueFile(modelId, path, modelName)); + const counts = await Promise.all([...countPromises, ...addedFilePromises]); + return counts.reduce((a, b) => a + b); + } +} diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 01d796f..58dba51 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -1,27 +1,48 @@ import config from 'config'; import httpStatus from 'http-status-codes'; -import { container } from 'tsyringe'; +import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; -import { ProviderConfig } from '../common/interfaces'; +import { CrawlingConfig, Provider, ProviderConfig } from '../common/interfaces'; +import { SERVICES } from '../common/constants'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; +import { CrawlingProvider } from './crawlingProvider'; -function getProvider(provider: string): S3Provider | NFSProvider { +const PROVIDER_CONFIG = Symbol('ProviderConfig'); +function getProvider(provider: string, container: DependencyContainer): Provider { + const childContainer = container.createChildContainer(); + childContainer.register(PROVIDER_CONFIG, { useValue: provider }); switch (provider.toLowerCase()) { case 'nfs': - return container.resolve(NFSProvider); + return childContainer.resolve(NFSProvider); case 's3': - return container.resolve(S3Provider); + return childContainer.resolve(S3Provider); + case 'crawling': { + const underlying = childContainer.resolve(SERVICES.PROVIDER_CONFIG).underlying!; + childContainer.register(SERVICES.UNDERLYING, { + useFactory: (childContainer) => getProvider(underlying, childContainer), + }); + return childContainer.resolve(CrawlingProvider); + } default: - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Invalid config provider received: ${provider}. Consult documentation for available values`, + false + ); } } -function getProviderConfig(provider: string): ProviderConfig { +function getProviderConfig(container: string | DependencyContainer): ProviderConfig { + const provider = typeof container == 'string' ? container : container.resolve(PROVIDER_CONFIG); try { return config.get(provider); } catch (err) { - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Invalid config provider received: ${provider}. Consult documentation for available values`, + false + ); } } diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index a871e1e..bf8044c 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -9,6 +9,7 @@ import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; import { Provider, NFSConfig, LogContext } from '../common/interfaces'; +// ToDo: merge this class with the identical class in file-syncer @injectable() export class NFSProvider implements Provider { private readonly logContext: LogContext; @@ -25,6 +26,24 @@ export class NFSProvider implements Provider { }; } + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + const pvPath = this.config.pvPath; + const fullPath = `${pvPath}/${filePath}`; + this.logger.debug({ + msg: 'Starting getFile', + logContext, + fullPath, + }); + const data = await fs.readFile(fullPath); + this.logger.debug({ + msg: 'Done getFile', + logContext, + }); + return data; + } + @withSpanAsyncV4 public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index d4162d6..ea24e58 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,4 +1,13 @@ -import { CommonPrefix, ListObjectsCommand, ListObjectsRequest, S3Client, S3ClientConfig, S3ServiceException, _Object } from '@aws-sdk/client-s3'; +import { + CommonPrefix, + ListObjectsCommand, + GetObjectCommand, + ListObjectsRequest, + S3Client, + S3ClientConfig, + S3ServiceException, + _Object, +} from '@aws-sdk/client-s3'; import { Logger } from '@map-colonies/js-logger'; import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; @@ -9,6 +18,7 @@ import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; import { LogContext, Provider, S3Config } from '../common/interfaces'; +// ToDo: merge this class with the identical class in file-syncer @injectable() export class S3Provider implements Provider { private readonly s3: S3Client; @@ -40,6 +50,39 @@ export class S3Provider implements Provider { }; } + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + this.logger.debug({ + msg: 'Starting to get file', + logContext, + filePath, + }); + + const getObjectCommand = new GetObjectCommand({ + /* eslint-disable @typescript-eslint/naming-convention */ + Bucket: this.s3Config.bucket, + Key: filePath, + /* eslint-disable @typescript-eslint/naming-convention */ + }); + + try { + const response = await this.s3.send(getObjectCommand); + const responseArray = await response.Body?.transformToByteArray(); + return Buffer.from(responseArray as Uint8Array); + } catch (err) { + this.logger.error({ + msg: 'an error occurred during getting file', + err, + endpoint: this.s3Config.endpointUrl, + bucketName: this.s3Config.bucket, + key: filePath, + }); + const s3Error = err as Error; + throw new Error(`an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`); + } + } + @withSpanAsyncV4 public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; diff --git a/tests/helpers/mockCreator.ts b/tests/helpers/mockCreator.ts index cdfbceb..b442201 100644 --- a/tests/helpers/mockCreator.ts +++ b/tests/helpers/mockCreator.ts @@ -146,4 +146,5 @@ export const jobManagerClientMock = { export const configProviderMock = { streamModelPathsToQueueFile: jest.fn(), + getFile: jest.fn(), }; diff --git a/tests/helpers/s3Helper.ts b/tests/helpers/s3Helper.ts index 45479ca..6ab9cae 100644 --- a/tests/helpers/s3Helper.ts +++ b/tests/helpers/s3Helper.ts @@ -51,14 +51,16 @@ export class S3Helper { await this.s3.send(command); } - public async createFileOfModel(model: string, file: string): Promise { + public async createFileOfModel(model: string, file: string): Promise { + const data = Buffer.from(faker.word.words()); const params: PutObjectCommandInput = { Bucket: this.s3Config.bucket, Key: `${model}/${file}`, - Body: Buffer.from(faker.word.words()), + Body: data, }; const command = new PutObjectCommand(params); await this.s3.send(command); + return data; } public async clearBucket(bucket = this.s3Config.bucket): Promise { diff --git a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts index 219b7c7..14d7190 100644 --- a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts +++ b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts @@ -28,7 +28,7 @@ describe('JobOperationsController on S3', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('s3'); + return getProvider('S3', container); }, }, }, @@ -189,7 +189,7 @@ describe('IngestionController on NFS', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('nfs'); + return getProvider('NFS', container); }, }, }, diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts new file mode 100644 index 0000000..898f02e --- /dev/null +++ b/tests/integration/providers/crawlingProvider.spec.ts @@ -0,0 +1,137 @@ +import fs from 'fs'; +import os from 'os'; +import jsLogger, { Logger } from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { faker } from '@faker-js/faker'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; +import { CrawlingConfig } from '../../../src/common/interfaces'; +import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; +import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; +import { configProviderMock } from '../../helpers/mockCreator'; +import { AppError } from '../../../src/common/appError'; + +// ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage +describe('CrawlingProvider tests', () => { + let provider: CrawlingProvider; + let queueFileHandler: QueueFileHandler; + const logger: Logger = jsLogger({ enabled: false }); + + const underlying = configProviderMock; + const queueFilePath = os.tmpdir(); + const config: CrawlingConfig = { + extension: '.json', + nestedJsonPath: '$.root..uri', + ignoreNotFound: false, + }; + + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: logger } }, + { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } }, + { token: SERVICES.UNDERLYING, provider: { useValue: underlying } }, + ], + }); + provider = container.resolve(CrawlingProvider); + queueFileHandler = container.resolve(QueueFileHandler); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + describe('constructor', () => { + it('is a stupid test just because coverage fails CI', () => { + const tracer = container.resolve(SERVICES.TRACER); + const provider = new CrawlingProvider(logger, tracer, config, underlying, queueFileHandler); + expect(() => new CrawlingProvider(logger, tracer, config, provider, queueFileHandler)).toThrow(AppError); + }); + }); + + describe('getFile', () => { + it('should delegate', async () => { + const filePath = 'A test??'; + const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); + underlying.getFile.mockResolvedValueOnce(buffetPromise); + const file = await provider.getFile(filePath); + expect(underlying.getFile).toHaveBeenCalledWith(filePath); + expect(file.toString()).toBe('Perry the test?!?!'); + }); + }); + + describe('streamModelPathsToQueueFile', () => { + const json0 = { + root: { + content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, + children: [ + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: '../1.json' }, children: [] }, + ], + }, + }; + const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { uri: '2.json' } }] } }; + const json2 = {}; + const pathToTileset = '/x/y/0.json'; + + it('should returns all the files from S3', async () => { + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + underlying.getFile + .mockImplementationOnce((path) => { + return path === pathToTileset && Buffer.from(JSON.stringify(json0), 'utf8'); + }) + .mockImplementationOnce((path) => { + return path === '/x/1.json' && Buffer.from(JSON.stringify(json1), 'utf8'); + }) + .mockImplementationOnce((path) => { + return path === '/x/2.json' && Buffer.from(JSON.stringify(json2), 'utf8'); + }); + + const expected: string[] = ['/x/y/0.json', '/x/1.json', '/x/2.json', '/x/y/a.b3dm', '/x/y/b.b3dm', '/x/bla/c.b3dm']; + await queueFileHandler.createQueueFile(modelId); + + await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trimEnd().split('\n'); + + expect(result.sort().join('\n')).toBe(expected.sort().join('\n')); + await queueFileHandler.deleteQueueFile(modelId); + }); + + it('should respect 404 ignore rules error on underlying.getFile error', async () => { + const configWithIgnoreNotFound = { ...config, ignoreNotFound: true }; + const provider = new CrawlingProvider(logger, container.resolve(SERVICES.TRACER), configWithIgnoreNotFound, underlying, queueFileHandler); + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).resolves.not.toThrow(); + }); + + it('should throw error on underlying.getFile error', async () => { + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + + it('should throw error bad file', async () => { + underlying.getFile.mockReturnValueOnce(Buffer.from('}{', 'utf8')); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + }); +}); diff --git a/tests/integration/providers/getProvider.spec.ts b/tests/integration/providers/getProvider.spec.ts index f4fcebc..d0a6f0b 100644 --- a/tests/integration/providers/getProvider.spec.ts +++ b/tests/integration/providers/getProvider.spec.ts @@ -1,4 +1,5 @@ import config from 'config'; +import { container } from 'tsyringe'; import { AppError } from '../../../src/common/appError'; import { NFSConfig, S3Config } from '../../../src/common/interfaces'; import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; @@ -35,7 +36,7 @@ describe('getProvider tests', () => { it('should throw an error when the provider is nor S3 or NFS', () => { const provider = 'bla'; - const response = () => getProvider(provider); + const response = () => getProvider(provider, container); expect(response).toThrow(AppError); }); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 8362b50..8c54227 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -44,6 +44,19 @@ describe('NFSProvider tests', () => { jest.clearAllMocks(); }); + describe('getFile', () => { + it('When calling getFile, should get the file content from pv path', async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const fileContent = await nfsHelper.createFileOfModel(model, file); + + const bufferResult = await provider.getFile(`${model}/${file}`); + const result = bufferResult.toString(); + + expect(result).toStrictEqual(fileContent); + }); + }); + describe('streamModelPathsToQueueFile Function', () => { it('if model exists in the agreed folder, returns all the file paths of the model', async () => { const modelId = faker.string.uuid(); diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index a42a293..16e2755 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -44,6 +44,28 @@ describe('S3Provider tests', () => { s3Helper.killS3(); }); + describe('getFile', () => { + it(`When calling getFile, should see the file content from source bucket`, async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const expected = await s3Helper.createFileOfModel(model, file); + + const result = await provider.getFile(`${model}/${file}`); + + expect(result).toStrictEqual(expected); + }); + + it(`When the file is not exists in the bucket, throws error`, async () => { + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + + const result = async () => { + await provider.getFile(file); + }; + + await expect(result).rejects.toThrow(Error); + }); + }); + describe('streamModelPathsToQueueFile', () => { it('returns all the files from S3', async () => { const modelId = faker.word.sample(); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts new file mode 100644 index 0000000..35f3600 --- /dev/null +++ b/tests/unit/providers/getProvider.spec.ts @@ -0,0 +1,41 @@ +import config from 'config'; +import jsLogger from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { getProvider } from '../../../src/providers/getProvider'; +import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; + +describe('getProvider tests', () => { + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, + { token: SERVICES.PROVIDER, provider: { useFactory: (container) => getProvider('crawling', container) } }, + ], + }); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + it('should recursively load provider', () => { + const provider = getProvider('crawling', container); + expect(provider).toBeInstanceOf(CrawlingProvider); + const crawlingProviderInstance = provider as CrawlingProvider; + // @ts-expect-error Accessing protected member + expect(crawlingProviderInstance.config).toEqual(config.get('crawling')); + // @ts-expect-error Accessing protected member + expect(crawlingProviderInstance.underlying).toBeInstanceOf(NFSProvider); + // @ts-expect-error Accessing protected member + const underlying = crawlingProviderInstance.underlying as NFSProvider; + // @ts-expect-error Accessing protected member + expect(underlying.config).toEqual(config.get('NFS')); + }); +});