From a57546d84f61dd432c574d2113fc3d453bc06703 Mon Sep 17 00:00:00 2001 From: Illia Antypenko Date: Wed, 5 Nov 2025 22:11:10 +0100 Subject: [PATCH 1/3] Drop usage of fs-extra and switch to native fs --- lib/plugins/save-resource-to-fs-plugin.js | 7 ++++--- lib/utils/fs.js | 13 +++++++++++++ package.json | 2 +- 3 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 lib/utils/fs.js diff --git a/lib/plugins/save-resource-to-fs-plugin.js b/lib/plugins/save-resource-to-fs-plugin.js index b5cfab02..c66b18f5 100644 --- a/lib/plugins/save-resource-to-fs-plugin.js +++ b/lib/plugins/save-resource-to-fs-plugin.js @@ -1,5 +1,6 @@ import path from 'path'; -import fs from 'fs-extra'; +import fs from 'fs'; +import { outputFile } from '../utils/fs.js'; class SaveResourceToFileSystemPlugin { apply (registerAction) { @@ -20,13 +21,13 @@ class SaveResourceToFileSystemPlugin { registerAction('saveResource', async ({resource}) => { const filename = path.join(absoluteDirectoryPath, resource.getFilename()); const text = resource.getText(); - await fs.outputFile(filename, text, { encoding: resource.getEncoding() }); + await outputFile(filename, text, resource.getEncoding()); loadedResources.push(resource); }); registerAction('error', async () => { if (loadedResources.length > 0) { - await fs.remove(absoluteDirectoryPath); + fs.rmSync(absoluteDirectoryPath, {force: true, recursive: true}); } }); } diff --git a/lib/utils/fs.js b/lib/utils/fs.js new file mode 100644 index 00000000..38618024 --- /dev/null +++ b/lib/utils/fs.js @@ -0,0 +1,13 @@ +import path from 'path'; +import fs from 'fs/promises'; + +async function outputFile (file, data, encoding) { + const dir = path.dirname(file); + await fs.mkdir(dir, { recursive: true}); + + return fs.writeFile(file, data, { encoding: encoding }); +} + +export { + outputFile +}; diff --git a/package.json b/package.json index 0392245a..8b90836e 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,6 @@ "css-url-parser": "^1.0.0", "debug": "^4.3.1", "filenamify": "^7.0.0", - "fs-extra": "^11.1.0", "got": "^14.4.7", "normalize-url": "^9.0.0", "p-queue": "^9.0.0", @@ -53,6 +52,7 @@ "c8": "^11.0.0", "chai": "^6.2.0", "eslint": "^10.5.0", + "fs-extra": "^11.1.0", "globals": "^17.7.0", "mocha": "^11.0.1", "nock": "^14.0.0", From 1e42bb312583b051366b7a1b3ee3d2443b7effff Mon Sep 17 00:00:00 2001 From: Illia Antypenko Date: Mon, 22 Jun 2026 23:30:41 +0200 Subject: [PATCH 2/3] Drop fs-extra usage in tests and remove dependency Replace fs-extra with native fs in all test files: - fs.removeSync(dir) -> fs.rmSync(dir, {recursive: true, force: true}) - fs.mkdirpSync(dir) -> fs.mkdirSync(dir, {recursive: true}) - fs.emptyDirSync(dir) -> fs.rmSync + fs.mkdirSync Remove fs-extra from devDependencies as it is no longer used. Co-Authored-By: Claude Opus 4.8 (1M context) --- package.json | 1 - test/e2e/e2e-test.js | 5 +++-- test/functional/base/base.test.js | 4 ++-- test/functional/base/check-it-works.js | 4 ++-- test/functional/binary-resources/images.test.js | 4 ++-- test/functional/callbacks/callbacks.test.js | 4 ++-- .../circular-dependencies/circular-dependencies.test.js | 4 ++-- test/functional/css-handling/css-handling.test.js | 4 ++-- test/functional/data-url/data-url.test.js | 4 ++-- test/functional/error-handling/error-handling.test.js | 4 ++-- test/functional/html-entities/html-entities.test.js | 4 ++-- test/functional/html-id-href/html-id-href.test.js | 4 ++-- test/functional/max-depth/max-depth.test.js | 4 ++-- test/functional/recursive/recursive.test.js | 4 ++-- test/functional/redirect/redirect.test.js | 4 ++-- .../request-concurrency/request-concurrency.test.js | 4 ++-- .../after-response-action.test.js | 4 ++-- .../request-response-customizations/request.test.js | 4 ++-- test/functional/resource-saver/resource-saver.test.js | 4 ++-- .../resource-without-ext/resource-without-ext.test.js | 4 ++-- .../update-missing-sources/update-missing-sources.test.js | 4 ++-- test/unit/scraper-test.js | 6 +++--- 22 files changed, 44 insertions(+), 44 deletions(-) diff --git a/package.json b/package.json index 8b90836e..3aa4c5e9 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,6 @@ "c8": "^11.0.0", "chai": "^6.2.0", "eslint": "^10.5.0", - "fs-extra": "^11.1.0", "globals": "^17.7.0", "mocha": "^11.0.1", "nock": "^14.0.0", diff --git a/test/e2e/e2e-test.js b/test/e2e/e2e-test.js index 391ed754..1f39abc0 100644 --- a/test/e2e/e2e-test.js +++ b/test/e2e/e2e-test.js @@ -1,5 +1,5 @@ import scrape from 'website-scraper'; -import fs from 'fs-extra'; +import fs from 'fs'; import * as chai from 'chai'; import { readFile } from 'fs/promises'; @@ -11,7 +11,8 @@ chai.should(); describe('E2E', function() { before(function() { - fs.emptyDirSync(resultDirname); + fs.rmSync(resultDirname, {recursive: true, force: true}); + fs.mkdirSync(resultDirname, {recursive: true}); }); after(function() { diff --git a/test/functional/base/base.test.js b/test/functional/base/base.test.js index c59f3bf1..79cca903 100644 --- a/test/functional/base/base.test.js +++ b/test/functional/base/base.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import * as cheerio from 'cheerio'; import scrape from 'website-scraper'; import Resource from '../../../lib/resource.js'; @@ -42,7 +42,7 @@ describe('Functional: base', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); beforeEach(() => { diff --git a/test/functional/base/check-it-works.js b/test/functional/base/check-it-works.js index 1a7827a8..758b8af8 100644 --- a/test/functional/base/check-it-works.js +++ b/test/functional/base/check-it-works.js @@ -2,7 +2,7 @@ import * as chai from 'chai'; chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/base/.tmp2'; @@ -17,7 +17,7 @@ describe('Functional: check it works', function() { afterEach(function () { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should work with promise', () => { diff --git a/test/functional/binary-resources/images.test.js b/test/functional/binary-resources/images.test.js index e21c10be..4f6c4ed7 100644 --- a/test/functional/binary-resources/images.test.js +++ b/test/functional/binary-resources/images.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import * as cheerio from 'cheerio'; import scrape from 'website-scraper'; @@ -31,7 +31,7 @@ describe('Functional: images', () => { afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); beforeEach(() => { diff --git a/test/functional/callbacks/callbacks.test.js b/test/functional/callbacks/callbacks.test.js index 041cd538..14a1f5fe 100644 --- a/test/functional/callbacks/callbacks.test.js +++ b/test/functional/callbacks/callbacks.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import sinon from 'sinon'; import scrape from 'website-scraper'; @@ -19,7 +19,7 @@ describe('Functional: onResourceSaved and onResourceError callbacks in plugin', afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should call onResourceSaved callback and onResourceError callback if ignoreErrors = true', function() { diff --git a/test/functional/circular-dependencies/circular-dependencies.test.js b/test/functional/circular-dependencies/circular-dependencies.test.js index 0896a540..133ec5bd 100644 --- a/test/functional/circular-dependencies/circular-dependencies.test.js +++ b/test/functional/circular-dependencies/circular-dependencies.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/circular-dependencies/.tmp'; @@ -19,7 +19,7 @@ describe('Functional circular dependencies', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should correctly load files with circular dependency', function() { diff --git a/test/functional/css-handling/css-handling.test.js b/test/functional/css-handling/css-handling.test.js index 6277e9a9..1f39d949 100644 --- a/test/functional/css-handling/css-handling.test.js +++ b/test/functional/css-handling/css-handling.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/css-handling/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: css handling', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should correctly handle css files, style tags and style attributes and ignore css-like text inside common html tags', function() { diff --git a/test/functional/data-url/data-url.test.js b/test/functional/data-url/data-url.test.js index fdfa94bf..ee9451cb 100644 --- a/test/functional/data-url/data-url.test.js +++ b/test/functional/data-url/data-url.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/data-url/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: data urls handling', function () { afterEach(function () { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should correctly handle html files with data urls in attributes', function () { diff --git a/test/functional/error-handling/error-handling.test.js b/test/functional/error-handling/error-handling.test.js index e9a8e993..5cd588b6 100644 --- a/test/functional/error-handling/error-handling.test.js +++ b/test/functional/error-handling/error-handling.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import sinon from 'sinon'; import scrape from 'website-scraper'; import Scraper from '../../../lib/scraper.js'; @@ -37,7 +37,7 @@ describe('Functional error handling', function() { afterEach(function () { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); describe('FS Error', function () { diff --git a/test/functional/html-entities/html-entities.test.js b/test/functional/html-entities/html-entities.test.js index 18b99e6b..4eccd84a 100644 --- a/test/functional/html-entities/html-entities.test.js +++ b/test/functional/html-entities/html-entities.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/html-entities/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: html entities', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should decode all html-entities found in html files and not encode entities from css file', function() { diff --git a/test/functional/html-id-href/html-id-href.test.js b/test/functional/html-id-href/html-id-href.test.js index 85b9d33d..bf339158 100644 --- a/test/functional/html-id-href/html-id-href.test.js +++ b/test/functional/html-id-href/html-id-href.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/html-id-href/.tmp'; @@ -19,7 +19,7 @@ describe('Functional html id href', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should ignore same-file paths and update other-file paths', function() { diff --git a/test/functional/max-depth/max-depth.test.js b/test/functional/max-depth/max-depth.test.js index 8b30e8e9..0a13c95c 100644 --- a/test/functional/max-depth/max-depth.test.js +++ b/test/functional/max-depth/max-depth.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/max-depth/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: maxDepth and maxRecursiveDepth ', () => { afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should filter out all resources by depth > maxDepth', () => { diff --git a/test/functional/recursive/recursive.test.js b/test/functional/recursive/recursive.test.js index b3eeedbe..f7a9720f 100644 --- a/test/functional/recursive/recursive.test.js +++ b/test/functional/recursive/recursive.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/recursive/.tmp'; @@ -19,7 +19,7 @@ describe('Functional recursive downloading', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should follow anchors if recursive flag is set', function () { diff --git a/test/functional/redirect/redirect.test.js b/test/functional/redirect/redirect.test.js index 4aef79cc..a0ab0446 100644 --- a/test/functional/redirect/redirect.test.js +++ b/test/functional/redirect/redirect.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import sinon from 'sinon'; import scrape from 'website-scraper'; import Scraper from '../../../lib/scraper.js'; @@ -21,7 +21,7 @@ describe('Functional redirects', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should follow redirects and save resource once if it has different urls', function() { diff --git a/test/functional/request-concurrency/request-concurrency.test.js b/test/functional/request-concurrency/request-concurrency.test.js index 7f0ff0dd..e09f6736 100644 --- a/test/functional/request-concurrency/request-concurrency.test.js +++ b/test/functional/request-concurrency/request-concurrency.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/request-concurrency/.tmp'; @@ -59,7 +59,7 @@ describe('Functional concurrent requests', function() { afterEach(function () { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should have maximum concurrent requests == requestConcurrency option', () => { diff --git a/test/functional/request-response-customizations/after-response-action.test.js b/test/functional/request-response-customizations/after-response-action.test.js index 2aeb4511..55a33cb5 100644 --- a/test/functional/request-response-customizations/after-response-action.test.js +++ b/test/functional/request-response-customizations/after-response-action.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/req-res-customizations-after-response/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: afterResponse action in plugin', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should skip downloading resource if afterResponse returns null', function() { diff --git a/test/functional/request-response-customizations/request.test.js b/test/functional/request-response-customizations/request.test.js index 1bee9dc1..05687430 100644 --- a/test/functional/request-response-customizations/request.test.js +++ b/test/functional/request-response-customizations/request.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/req-res-customizations-request/.tmp'; @@ -19,7 +19,7 @@ describe('Functional: customize request options with plugin', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should use options from request property if no beforeRequest actions', function() { diff --git a/test/functional/resource-saver/resource-saver.test.js b/test/functional/resource-saver/resource-saver.test.js index 3ef6fe0b..a01a6d9b 100644 --- a/test/functional/resource-saver/resource-saver.test.js +++ b/test/functional/resource-saver/resource-saver.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import sinon from 'sinon'; import scrape from 'website-scraper'; @@ -20,7 +20,7 @@ describe('Functional: plugin for saving resources', () => { afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); let saveResourceStub, handleErrorStub, saveResourcePlugin; diff --git a/test/functional/resource-without-ext/resource-without-ext.test.js b/test/functional/resource-without-ext/resource-without-ext.test.js index afc07466..97e66088 100644 --- a/test/functional/resource-without-ext/resource-without-ext.test.js +++ b/test/functional/resource-without-ext/resource-without-ext.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/resource-without-ext/.tmp'; @@ -19,7 +19,7 @@ describe('Functional resources without extensions', function() { afterEach(function() { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should load resources without extensions with correct type and wrap with extensions', function () { diff --git a/test/functional/update-missing-sources/update-missing-sources.test.js b/test/functional/update-missing-sources/update-missing-sources.test.js index 69e5d64a..c10ae2cf 100644 --- a/test/functional/update-missing-sources/update-missing-sources.test.js +++ b/test/functional/update-missing-sources/update-missing-sources.test.js @@ -3,7 +3,7 @@ chai.should(); import '../../utils/assertions.js'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import scrape from 'website-scraper'; const testDirname = './test/functional/update-missing-sources/.tmp'; @@ -37,7 +37,7 @@ describe('Functional: update missing sources', () => { afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); it('should not update missing sources by default', () => { diff --git a/test/unit/scraper-test.js b/test/unit/scraper-test.js index 68f67a24..2b9477b0 100644 --- a/test/unit/scraper-test.js +++ b/test/unit/scraper-test.js @@ -2,7 +2,7 @@ import * as chai from 'chai'; chai.should(); import sinon from 'sinon'; import nock from 'nock'; -import fs from 'fs-extra'; +import fs from 'fs'; import path from 'path'; import Scraper from '../../lib/scraper.js'; import Resource from '../../lib/resource.js'; @@ -23,7 +23,7 @@ describe('Scraper', () => { afterEach(() => { nock.cleanAll(); nock.enableNetConnect(); - fs.removeSync(testDirname); + fs.rmSync(testDirname, {recursive: true, force: true}); }); describe('#loadResource', () => { @@ -542,7 +542,7 @@ describe('Scraper', () => { it('should return error if existing directory passed', async () => { try { - fs.mkdirpSync(testDirname); + fs.mkdirSync(testDirname, {recursive: true}); const s = new Scraper({ urls: 'http://example.com', directory: testDirname From 3cc87fc4bc446407ee0be707dcf7df86504286e4 Mon Sep 17 00:00:00 2001 From: Illia Antypenko Date: Mon, 22 Jun 2026 23:47:19 +0200 Subject: [PATCH 3/3] Use async fs.promises.rm for error cleanup instead of sync rmSync Keeps the error-action cleanup non-blocking, matching the original awaited fs.remove behavior and the async outputFile write path. The error action runs at most once (single catch in Scraper.scrape), so the async call carries no concurrency risk. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/plugins/save-resource-to-fs-plugin.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/plugins/save-resource-to-fs-plugin.js b/lib/plugins/save-resource-to-fs-plugin.js index c66b18f5..19b2d1d4 100644 --- a/lib/plugins/save-resource-to-fs-plugin.js +++ b/lib/plugins/save-resource-to-fs-plugin.js @@ -27,7 +27,7 @@ class SaveResourceToFileSystemPlugin { registerAction('error', async () => { if (loadedResources.length > 0) { - fs.rmSync(absoluteDirectoryPath, {force: true, recursive: true}); + await fs.promises.rm(absoluteDirectoryPath, {force: true, recursive: true}); } }); }