diff --git a/packages/contentstack-import/src/config/index.ts b/packages/contentstack-import/src/config/index.ts index a18d7d077..bb6e6a6e7 100644 --- a/packages/contentstack-import/src/config/index.ts +++ b/packages/contentstack-import/src/config/index.ts @@ -93,9 +93,9 @@ const config: DefaultConfig = { assetBatchLimit: 1, fileName: 'assets.json', importSameStructure: true, - uploadAssetsConcurrency: 2, + uploadAssetsConcurrency: 2, // Keeping original concurrency setting displayExecutionTime: false, - importFoldersConcurrency: 1, + importFoldersConcurrency: 1, // Keeping original concurrency setting includeVersionedAssets: false, host: 'https://api.contentstack.io', folderValidKeys: ['name', 'parent_uid'], diff --git a/packages/contentstack-import/src/import/modules/assets.ts b/packages/contentstack-import/src/import/modules/assets.ts index 007f21875..debc2b568 100644 --- a/packages/contentstack-import/src/import/modules/assets.ts +++ b/packages/contentstack-import/src/import/modules/assets.ts @@ -18,7 +18,7 @@ import { PATH_CONSTANTS } from '../../constants'; import config from '../../config'; import { ModuleClassParams } from '../../types'; -import { formatDate, PROCESS_NAMES, MODULE_CONTEXTS, MODULE_NAMES, PROCESS_STATUS } from '../../utils'; +import { formatDate, PROCESS_NAMES, MODULE_CONTEXTS, MODULE_NAMES, PROCESS_STATUS, MemoryUtils } from '../../utils'; import BaseClass, { ApiOptions } from './base-class'; export default class ImportAssets extends BaseClass { @@ -241,6 +241,29 @@ export default class ImportAssets extends BaseClass { this.progressManager?.tick(true, `asset: ${title || uid}`, null, progressProcessName); log.debug(`Created asset: ${title} (Mapped ${uid} → ${response.uid})`, this.importConfig.context); log.success(`Created asset: '${title}'`, this.importConfig.context); + + // Periodic mapping cleanup every 1000 assets to prevent memory accumulation + const totalMappings = Object.keys(this.assetsUidMap).length; + if (MemoryUtils.shouldCleanup(totalMappings, 1000)) { + log.debug(`Performing periodic cleanup at ${totalMappings} assets`, this.importConfig.context); + + // Write current mappings to disk + if (!isEmpty(this.assetsUidMap)) { + this.fs.writeFile(this.assetUidMapperPath, this.assetsUidMap); + } + if (!isEmpty(this.assetsUrlMap)) { + this.fs.writeFile(this.assetUrlMapperPath, this.assetsUrlMap); + } + + // Clear in-memory maps to free memory + this.assetsUidMap = {}; + this.assetsUrlMap = {}; + + // Force garbage collection if available + MemoryUtils.forceGarbageCollection(this.importConfig.context); + + MemoryUtils.logMemoryStats(`After cleanup at ${totalMappings} assets`, this.importConfig.context); + } }; const onReject = ({ error, apiData: { title, uid } = undefined }: any) => { @@ -307,20 +330,33 @@ export default class ImportAssets extends BaseClass { undefined, !isVersion, ); + + // Memory cleanup after chunk processing + MemoryUtils.cleanup(chunk, apiContent); + + // Log memory stats periodically + if (+index % 10 === 0) { + MemoryUtils.logMemoryStats(`Processed chunk ${index}/${indexerCount}`, this.importConfig.context); + } } } if (!isVersion) { + // Write any remaining mappings that weren't written during periodic cleanup if (!isEmpty(this.assetsUidMap)) { const uidMappingCount = Object.keys(this.assetsUidMap || {}).length; - log.debug(`Writing ${uidMappingCount} UID mappings`, this.importConfig.context); + log.debug(`Writing final ${uidMappingCount} UID mappings`, this.importConfig.context); this.fs.writeFile(this.assetUidMapperPath, this.assetsUidMap); } if (!isEmpty(this.assetsUrlMap)) { const urlMappingCount = Object.keys(this.assetsUrlMap || {}).length; - log.debug(`Writing ${urlMappingCount} URL mappings`, this.importConfig.context); + log.debug(`Writing final ${urlMappingCount} URL mappings`, this.importConfig.context); this.fs.writeFile(this.assetUrlMapperPath, this.assetsUrlMap); } + + // Final memory cleanup + MemoryUtils.cleanup(this.assetsUidMap, this.assetsUrlMap); + MemoryUtils.logMemoryStats('Import completed', this.importConfig.context); } } diff --git a/packages/contentstack-import/src/utils/backup-handler.ts b/packages/contentstack-import/src/utils/backup-handler.ts index 825c792d1..15acaad48 100755 --- a/packages/contentstack-import/src/utils/backup-handler.ts +++ b/packages/contentstack-import/src/utils/backup-handler.ts @@ -1,5 +1,6 @@ import * as path from 'path'; import { copy } from 'fs-extra'; +import { statSync } from 'node:fs'; import { cliux, sanitizePath, log } from '@contentstack/cli-utilities'; import { fileHelper } from './index'; @@ -53,6 +54,27 @@ export default async function backupHandler(importConfig: ImportConfig): Promise } if (backupDirPath) { + // Check dataset size before backup to prevent memory issues + try { + const stats = statSync(sourceDir); + const sizeGB = stats.size / (1024 * 1024 * 1024); + const sizeThresholdGB = 1; // Skip backup for datasets larger than 1GB + + log.debug(`Source directory size: ${sizeGB.toFixed(2)}GB`, importConfig.context); + + if (sizeGB > sizeThresholdGB) { + const skipMessage = `Large dataset detected (${sizeGB.toFixed(2)}GB > ${sizeThresholdGB}GB threshold). Skipping backup to save memory and prevent OOM errors.`; + log.warn(skipMessage, importConfig.context); + cliux.print(skipMessage, { color: 'yellow' }); + + // Return the source directory as the "backup" directory + log.debug(`Using source directory directly: ${sourceDir}`, importConfig.context); + return sourceDir; + } + } catch (error) { + log.debug(`Could not determine source directory size: ${error}. Proceeding with backup.`, importConfig.context); + } + log.debug(`Starting content copy to backup directory: ${backupDirPath}`); log.info('Copying content to the backup directory...', importConfig.context); diff --git a/packages/contentstack-import/src/utils/index.ts b/packages/contentstack-import/src/utils/index.ts index 49ab1146b..aef41639c 100644 --- a/packages/contentstack-import/src/utils/index.ts +++ b/packages/contentstack-import/src/utils/index.ts @@ -32,4 +32,5 @@ export { } from './entries-helper'; export * from './common-helper'; export { lookUpTaxonomy, lookUpTerms } from './taxonomies-helper'; +export { MemoryUtils, MemoryStats } from './memory-utils'; export { MODULE_CONTEXTS, MODULE_NAMES, PROCESS_NAMES, PROCESS_STATUS } from './constants'; diff --git a/packages/contentstack-import/src/utils/memory-utils.ts b/packages/contentstack-import/src/utils/memory-utils.ts new file mode 100644 index 000000000..b4ebac9b4 --- /dev/null +++ b/packages/contentstack-import/src/utils/memory-utils.ts @@ -0,0 +1,104 @@ +import { log } from '@contentstack/cli-utilities'; + +export interface MemoryStats { + rss: number; + heapTotal: number; + heapUsed: number; + external: number; + arrayBuffers: number; + heapUsedMB: number; + heapTotalMB: number; + rssMB: number; +} + +/** + * Simple memory monitoring utilities for asset import + */ +export class MemoryUtils { + private static lastGC: number = 0; + private static gcCooldownMs: number = 5000; // 5 second cooldown between GC calls + + /** + * Get current memory usage statistics + */ + static getMemoryStats(): MemoryStats { + const usage = process.memoryUsage(); + + return { + rss: usage.rss, + heapTotal: usage.heapTotal, + heapUsed: usage.heapUsed, + external: usage.external, + arrayBuffers: usage.arrayBuffers, + heapUsedMB: Math.round(usage.heapUsed / 1024 / 1024 * 100) / 100, + heapTotalMB: Math.round(usage.heapTotal / 1024 / 1024 * 100) / 100, + rssMB: Math.round(usage.rss / 1024 / 1024 * 100) / 100, + }; + } + + /** + * Check if memory usage exceeds the given threshold + * @param thresholdMB Memory threshold in MB + */ + static checkMemoryPressure(thresholdMB: number = 1024): boolean { + const stats = this.getMemoryStats(); + return stats.heapUsedMB > thresholdMB; + } + + /** + * Force garbage collection if available and cooldown period has passed + */ + static async forceGarbageCollection(context?: Record): Promise { + const now = Date.now(); + + if (now - this.lastGC < this.gcCooldownMs) { + return; // Skip if cooldown period hasn't passed + } + + const beforeStats = this.getMemoryStats(); + + if (global.gc) { + log.debug(`Forcing garbage collection - heap before: ${beforeStats.heapUsedMB}MB`, context); + global.gc(); + + // Small delay to allow GC to complete + await new Promise(resolve => setTimeout(resolve, 100)); + + const afterStats = this.getMemoryStats(); + const freedMB = beforeStats.heapUsedMB - afterStats.heapUsedMB; + + log.debug(`GC completed - heap after: ${afterStats.heapUsedMB}MB, freed: ${freedMB.toFixed(2)}MB`, context); + + this.lastGC = now; + } else { + log.warn('Garbage collection not available. Run with --expose-gc flag for better memory management.', context); + } + } + + /** + * Log memory statistics with a given label + */ + static logMemoryStats(label: string, context?: Record): void { + const stats = this.getMemoryStats(); + log.debug(`${label} - Memory: ${stats.heapUsedMB}MB used / ${stats.heapTotalMB}MB total (RSS: ${stats.rssMB}MB)`, context); + } + + /** + * Perform memory cleanup operations + * @param objects Array of objects to null out + */ + static cleanup(...objects: any[]): void { + for (let i = 0; i < objects.length; i++) { + objects[i] = null; + } + } + + /** + * Check if we should trigger memory cleanup based on count + * @param count Current count + * @param interval Cleanup interval (default 1000) + */ + static shouldCleanup(count: number, interval: number = 1000): boolean { + return count > 0 && count % interval === 0; + } +} \ No newline at end of file diff --git a/packages/contentstack-import/test/integration/memory-optimization.test.ts b/packages/contentstack-import/test/integration/memory-optimization.test.ts new file mode 100644 index 000000000..f716735a9 --- /dev/null +++ b/packages/contentstack-import/test/integration/memory-optimization.test.ts @@ -0,0 +1,141 @@ +import { expect } from 'chai'; +import sinon from 'sinon'; +import { MemoryUtils } from '../../src/utils/memory-utils'; + +describe('Memory Optimization Integration', () => { + let memoryUtilsSpy: sinon.SinonSpy; + let processMemoryUsageStub: sinon.SinonStub; + + beforeEach(() => { + // Mock process.memoryUsage to simulate memory pressure + processMemoryUsageStub = sinon.stub(process, 'memoryUsage').returns({ + rss: 2000 * 1024 * 1024, // 2GB + heapTotal: 1500 * 1024 * 1024, // 1.5GB + heapUsed: 1200 * 1024 * 1024, // 1.2GB (above 1GB threshold) + external: 100 * 1024 * 1024, + arrayBuffers: 50 * 1024 * 1024, + }); + + memoryUtilsSpy = sinon.spy(MemoryUtils); + }); + + afterEach(() => { + processMemoryUsageStub.restore(); + sinon.restore(); + }); + + describe('Memory Pressure Detection', () => { + it('should detect memory pressure with large heap usage', () => { + const isUnderPressure = MemoryUtils.checkMemoryPressure(1024); // 1GB threshold + expect(isUnderPressure).to.be.true; + }); + + it('should not detect memory pressure with normal heap usage', () => { + // Mock lower memory usage + processMemoryUsageStub.returns({ + rss: 500 * 1024 * 1024, + heapTotal: 400 * 1024 * 1024, + heapUsed: 300 * 1024 * 1024, // 300MB < 1GB threshold + external: 50 * 1024 * 1024, + arrayBuffers: 25 * 1024 * 1024, + }); + + const isUnderPressure = MemoryUtils.checkMemoryPressure(1024); + expect(isUnderPressure).to.be.false; + }); + }); + + describe('Periodic Cleanup Logic', () => { + it('should trigger cleanup at correct intervals', () => { + // Test cleanup intervals + expect(MemoryUtils.shouldCleanup(1000, 1000)).to.be.true; + expect(MemoryUtils.shouldCleanup(2000, 1000)).to.be.true; + expect(MemoryUtils.shouldCleanup(999, 1000)).to.be.false; + expect(MemoryUtils.shouldCleanup(1001, 1000)).to.be.false; + }); + + it('should use default interval of 1000', () => { + expect(MemoryUtils.shouldCleanup(1000)).to.be.true; + expect(MemoryUtils.shouldCleanup(2000)).to.be.true; + expect(MemoryUtils.shouldCleanup(3000)).to.be.true; + }); + }); + + describe('Memory Statistics', () => { + it('should provide accurate memory statistics', () => { + const stats = MemoryUtils.getMemoryStats(); + + expect(stats.heapUsedMB).to.equal(1200); // 1.2GB in MB + expect(stats.heapTotalMB).to.equal(1500); // 1.5GB in MB + expect(stats.rssMB).to.equal(2000); // 2GB in MB + + expect(stats.heapUsed).to.equal(1200 * 1024 * 1024); + expect(stats.heapTotal).to.equal(1500 * 1024 * 1024); + expect(stats.rss).to.equal(2000 * 1024 * 1024); + }); + }); + + describe('Garbage Collection', () => { + it('should handle garbage collection gracefully when not available', async () => { + // Ensure global.gc is not available + delete (global as any).gc; + + // Should not throw an error + await MemoryUtils.forceGarbageCollection(); + }); + + it('should call garbage collection when available', async () => { + const mockGc = sinon.stub(); + (global as any).gc = mockGc; + + await MemoryUtils.forceGarbageCollection(); + + expect(mockGc.calledOnce).to.be.true; + + delete (global as any).gc; + }); + }); + + describe('Memory Cleanup Simulation', () => { + it('should simulate asset processing memory cleanup', () => { + // Simulate processing 5000 assets + let memoryCleanupCount = 0; + + for (let i = 1; i <= 5000; i++) { + if (MemoryUtils.shouldCleanup(i, 1000)) { + memoryCleanupCount++; + } + } + + // Should trigger cleanup 5 times (at 1000, 2000, 3000, 4000, 5000) + expect(memoryCleanupCount).to.equal(5); + }); + + it('should demonstrate memory pressure detection throughout processing', () => { + const memoryReadings = []; + + // Simulate increasing memory usage + for (let i = 0; i < 5; i++) { + const memoryUsageMB = 500 + (i * 200); // 500MB, 700MB, 900MB, 1100MB, 1300MB + + processMemoryUsageStub.returns({ + rss: memoryUsageMB * 1024 * 1024, + heapTotal: (memoryUsageMB - 100) * 1024 * 1024, + heapUsed: (memoryUsageMB - 200) * 1024 * 1024, + external: 50 * 1024 * 1024, + arrayBuffers: 25 * 1024 * 1024, + }); + + const isUnderPressure = MemoryUtils.checkMemoryPressure(1024); // 1GB threshold + memoryReadings.push({ memoryUsageMB: memoryUsageMB - 200, isUnderPressure }); + } + + // Should detect pressure when memory exceeds 1GB (1024MB) + expect(memoryReadings[0].isUnderPressure).to.be.false; // 300MB + expect(memoryReadings[1].isUnderPressure).to.be.false; // 500MB + expect(memoryReadings[2].isUnderPressure).to.be.false; // 700MB + expect(memoryReadings[3].isUnderPressure).to.be.true; // 900MB (close to threshold) + expect(memoryReadings[4].isUnderPressure).to.be.true; // 1100MB (over threshold) + }); + }); +}); \ No newline at end of file