| 'use strict' |
| |
| /** |
| * Copyright Brian White. All rights reserved. |
| * |
| * @see https://github.com/mscdex/streamsearch |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to |
| * deal in the Software without restriction, including without limitation the |
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation |
| * by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool |
| */ |
| const EventEmitter = require('node:events').EventEmitter |
| const inherits = require('node:util').inherits |
| |
| function SBMH (needle) { |
| if (typeof needle === 'string') { |
| needle = Buffer.from(needle) |
| } |
| |
| if (!Buffer.isBuffer(needle)) { |
| throw new TypeError('The needle has to be a String or a Buffer.') |
| } |
| |
| const needleLength = needle.length |
| |
| if (needleLength === 0) { |
| throw new Error('The needle cannot be an empty String/Buffer.') |
| } |
| |
| if (needleLength > 256) { |
| throw new Error('The needle cannot have a length bigger than 256.') |
| } |
| |
| this.maxMatches = Infinity |
| this.matches = 0 |
| |
| this._occ = new Array(256) |
| .fill(needleLength) // Initialize occurrence table. |
| this._lookbehind_size = 0 |
| this._needle = needle |
| this._bufpos = 0 |
| |
| this._lookbehind = Buffer.alloc(needleLength) |
| |
| // Populate occurrence table with analysis of the needle, |
| // ignoring last letter. |
| for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var |
| this._occ[needle[i]] = needleLength - 1 - i |
| } |
| } |
| inherits(SBMH, EventEmitter) |
| |
| SBMH.prototype.reset = function () { |
| this._lookbehind_size = 0 |
| this.matches = 0 |
| this._bufpos = 0 |
| } |
| |
| SBMH.prototype.push = function (chunk, pos) { |
| if (!Buffer.isBuffer(chunk)) { |
| chunk = Buffer.from(chunk, 'binary') |
| } |
| const chlen = chunk.length |
| this._bufpos = pos || 0 |
| let r |
| while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) } |
| return r |
| } |
| |
| SBMH.prototype._sbmh_feed = function (data) { |
| const len = data.length |
| const needle = this._needle |
| const needleLength = needle.length |
| const lastNeedleChar = needle[needleLength - 1] |
| |
| // Positive: points to a position in `data` |
| // pos == 3 points to data[3] |
| // Negative: points to a position in the lookbehind buffer |
| // pos == -2 points to lookbehind[lookbehind_size - 2] |
| let pos = -this._lookbehind_size |
| let ch |
| |
| if (pos < 0) { |
| // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool |
| // search with character lookup code that considers both the |
| // lookbehind buffer and the current round's haystack data. |
| // |
| // Loop until |
| // there is a match. |
| // or until |
| // we've moved past the position that requires the |
| // lookbehind buffer. In this case we switch to the |
| // optimized loop. |
| // or until |
| // the character to look at lies outside the haystack. |
| while (pos < 0 && pos <= len - needleLength) { |
| ch = this._sbmh_lookup_char(data, pos + needleLength - 1) |
| |
| if ( |
| ch === lastNeedleChar && |
| this._sbmh_memcmp(data, pos, needleLength - 1) |
| ) { |
| this._lookbehind_size = 0 |
| ++this.matches |
| this.emit('info', true) |
| |
| return (this._bufpos = pos + needleLength) |
| } |
| pos += this._occ[ch] |
| } |
| |
| // No match. |
| |
| if (pos < 0) { |
| // There's too few data for Boyer-Moore-Horspool to run, |
| // so let's use a different algorithm to skip as much as |
| // we can. |
| // Forward pos until |
| // the trailing part of lookbehind + data |
| // looks like the beginning of the needle |
| // or until |
| // pos == 0 |
| while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos } |
| } |
| |
| if (pos >= 0) { |
| // Discard lookbehind buffer. |
| this.emit('info', false, this._lookbehind, 0, this._lookbehind_size) |
| this._lookbehind_size = 0 |
| } else { |
| // Cut off part of the lookbehind buffer that has |
| // been processed and append the entire haystack |
| // into it. |
| const bytesToCutOff = this._lookbehind_size + pos |
| if (bytesToCutOff > 0) { |
| // The cut off data is guaranteed not to contain the needle. |
| this.emit('info', false, this._lookbehind, 0, bytesToCutOff) |
| } |
| |
| this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff, |
| this._lookbehind_size - bytesToCutOff) |
| this._lookbehind_size -= bytesToCutOff |
| |
| data.copy(this._lookbehind, this._lookbehind_size) |
| this._lookbehind_size += len |
| |
| this._bufpos = len |
| return len |
| } |
| } |
| |
| pos += (pos >= 0) * this._bufpos |
| |
| // Lookbehind buffer is now empty. We only need to check if the |
| // needle is in the haystack. |
| if (data.indexOf(needle, pos) !== -1) { |
| pos = data.indexOf(needle, pos) |
| ++this.matches |
| if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) } |
| |
| return (this._bufpos = pos + needleLength) |
| } else { |
| pos = len - needleLength |
| } |
| |
| // There was no match. If there's trailing haystack data that we cannot |
| // match yet using the Boyer-Moore-Horspool algorithm (because the trailing |
| // data is less than the needle size) then match using a modified |
| // algorithm that starts matching from the beginning instead of the end. |
| // Whatever trailing data is left after running this algorithm is added to |
| // the lookbehind buffer. |
| while ( |
| pos < len && |
| ( |
| data[pos] !== needle[0] || |
| ( |
| (Buffer.compare( |
| data.subarray(pos, pos + len - pos), |
| needle.subarray(0, len - pos) |
| ) !== 0) |
| ) |
| ) |
| ) { |
| ++pos |
| } |
| if (pos < len) { |
| data.copy(this._lookbehind, 0, pos, pos + (len - pos)) |
| this._lookbehind_size = len - pos |
| } |
| |
| // Everything until pos is guaranteed not to contain needle data. |
| if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) } |
| |
| this._bufpos = len |
| return len |
| } |
| |
| SBMH.prototype._sbmh_lookup_char = function (data, pos) { |
| return (pos < 0) |
| ? this._lookbehind[this._lookbehind_size + pos] |
| : data[pos] |
| } |
| |
| SBMH.prototype._sbmh_memcmp = function (data, pos, len) { |
| for (var i = 0; i < len; ++i) { // eslint-disable-line no-var |
| if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false } |
| } |
| return true |
| } |
| |
| module.exports = SBMH |