From 2b1fa7b8bd858534e8d61bce550ec435e42a2a35 Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 11 Dec 2024 18:57:16 -0500 Subject: [PATCH] Convert to gf2 --- CHANGELOG.md | 10 ---- README.md | 32 ++++++------- package.json | 5 +- src/twoBitFile.ts | 116 +++++++++++++++------------------------------- yarn.lock | 35 ++------------ 5 files changed, 61 insertions(+), 137 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65bc96d..abbec5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,23 +1,13 @@ ## [3.0.1](https://github.com/GMOD/twobit-js/compare/v2.0.1...v3.0.1) (2024-11-10) - - # [3.0.0](https://github.com/GMOD/twobit-js/compare/v2.0.1...v3.0.0) (2024-11-10) - - # [3.0.0](https://github.com/GMOD/twobit-js/compare/v2.0.1...v3.0.0) (2024-11-10) - - # [3.0.0](https://github.com/GMOD/twobit-js/compare/v2.0.1...v3.0.0) (2024-11-10) - - # [3.0.0](https://github.com/GMOD/twobit-js/compare/v2.0.1...v3.0.0) (2024-11-10) - - ## [2.0.1](https://github.com/GMOD/twobit-js/compare/v2.0.0...v2.0.1) (2024-08-09) # [2.0.0](https://github.com/GMOD/twobit-js/compare/v1.1.14...v2.0.0) (2024-08-09) diff --git a/README.md b/README.md index 427a83f..191df81 100644 --- a/README.md +++ b/README.md @@ -44,24 +44,24 @@ const seqNames = await t.getSequenceNames() ##### Table of Contents -* [constructor](#constructor) - * [Parameters](#parameters) -* [getSequenceNames](#getsequencenames) -* [getSequenceSizes](#getsequencesizes) -* [getSequenceSize](#getsequencesize) - * [Parameters](#parameters-1) -* [getSequence](#getsequence) - * [Parameters](#parameters-2) +- [constructor](#constructor) + - [Parameters](#parameters) +- [getSequenceNames](#getsequencenames) +- [getSequenceSizes](#getsequencesizes) +- [getSequenceSize](#getsequencesize) + - [Parameters](#parameters-1) +- [getSequence](#getsequence) + - [Parameters](#parameters-2) #### constructor ##### Parameters -* `args` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** +- `args` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** - * `args.filehandle` **Filehandle?** node fs.promises-like filehandle for the .2bit file. - Only needs to support `filehandle.read(buffer, offset, length, position)` - * `args.path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** filesystem path for the .2bit file to open + - `args.filehandle` **Filehandle?** node fs.promises-like filehandle for the .2bit file. + Only needs to support `filehandle.read(buffer, offset, length, position)` + - `args.path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** filesystem path for the .2bit file to open #### getSequenceNames @@ -78,7 +78,7 @@ e.g. a chrom.sizes file, it will be much faster ##### Parameters -* `seqName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** name of the sequence +- `seqName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** name of the sequence Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)** for the sequence's length, or undefined if it is not in the file @@ -86,9 +86,9 @@ Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/ ##### Parameters -* `seqName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** name of the sequence you want -* `regionStart` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)?** optional 0-based half-open start of the sequence region to fetch. (optional, default `0`) -* `regionEnd` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)?** optional 0-based half-open end of the sequence region to fetch. defaults to end of the sequence (optional, default `Number.POSITIVE_INFINITY`) +- `seqName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** name of the sequence you want +- `regionStart` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)?** optional 0-based half-open start of the sequence region to fetch. (optional, default `0`) +- `regionEnd` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)?** optional 0-based half-open end of the sequence region to fetch. defaults to end of the sequence (optional, default `Number.POSITIVE_INFINITY`) Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)** for a string of sequence bases diff --git a/package.json b/package.json index 83bac3c..bc6cb3d 100644 --- a/package.json +++ b/package.json @@ -39,8 +39,7 @@ "biojs" ], "dependencies": { - "buffer": "^6.0.3", - "generic-filehandle": "^3.0.0" + "generic-filehandle2": "^0.0.1" }, "devDependencies": { "@typescript-eslint/eslint-plugin": "^8.0.1", @@ -50,7 +49,7 @@ "eslint": "^9.7.0", "eslint-plugin-import": "^2.26.0", "eslint-plugin-unicorn": "^56.0.0", - "prettier": "^3.3.3", + "prettier": "^3.4.2", "rimraf": "^6.0.1", "standard-changelog": "^6.0.0", "typescript": "^5.5.3", diff --git a/src/twoBitFile.ts b/src/twoBitFile.ts index d4e4f10..ec727d7 100644 --- a/src/twoBitFile.ts +++ b/src/twoBitFile.ts @@ -1,5 +1,4 @@ -import { LocalFile, GenericFilehandle } from 'generic-filehandle' -import { Buffer } from 'buffer' +import { LocalFile, GenericFilehandle } from 'generic-filehandle2' const TWOBIT_MAGIC = 0x1a412743 @@ -9,9 +8,9 @@ const byteTo4Bases = [] as string[] for (let index = 0; index < 256; index++) { byteTo4Bases.push( twoBit[(index >> 6) & 3] + - twoBit[(index >> 4) & 3] + - twoBit[(index >> 2) & 3] + - twoBit[index & 3], + twoBit[(index >> 4) & 3] + + twoBit[(index >> 2) & 3] + + twoBit[index & 3], ) } @@ -46,17 +45,11 @@ export default class TwoBitFile { } async _detectEndianness() { - const returnValue = await this.filehandle.read( - Buffer.allocUnsafe(8), - 0, - 8, - 0, - ) - const { buffer } = returnValue - if (buffer.readInt32LE(0) === TWOBIT_MAGIC) { - this.version = buffer.readInt32LE(4) - } else if (buffer.readInt32BE(0) === TWOBIT_MAGIC) { - throw new Error('big endian not supported') + const buffer = await this.filehandle.read(8, 0) + const dataView = new DataView(buffer.buffer) + const magic = dataView.getInt32(0, true) + if (magic === TWOBIT_MAGIC) { + this.version = dataView.getInt32(0, true) } else { throw new Error('not a 2bit file') } @@ -75,14 +68,7 @@ export default class TwoBitFile { async _getHeader() { await this._detectEndianness() - const { buffer } = await this.filehandle.read( - Buffer.allocUnsafe(16), - 0, - 16, - 0, - ) - - const b = buffer + const b = await this.filehandle.read(16, 0) const le = true const dataView = new DataView(b.buffer, b.byteOffset, b.length) let offset = 0 @@ -119,15 +105,9 @@ export default class TwoBitFile { const header = await this.getHeader() const maxIndexLength = 8 + header.sequenceCount * (1 + 256 + (this.version === 1 ? 8 : 4)) - const { buffer } = await this.filehandle.read( - Buffer.allocUnsafe(maxIndexLength), - 0, - maxIndexLength, - 8, - ) + const b = await this.filehandle.read(maxIndexLength, 8) const le = true - const b = buffer const dataView = new DataView(b.buffer, b.byteOffset, b.length) let offset = 0 const sequenceCount = dataView.getUint32(offset, le) @@ -135,12 +115,11 @@ export default class TwoBitFile { // const reserved = dataView.getUint32(offset, le) offset += 4 const indexData = [] + const decoder = new TextDecoder('utf8') for (let i = 0; i < sequenceCount; i++) { const nameLength = dataView.getUint8(offset) offset += 1 - const name = buffer - .subarray(offset, offset + nameLength) - .toString() as string + const name = decoder.decode(b.subarray(offset, offset + nameLength)) offset += nameLength if (header.version === 1) { const dataOffset = Number(dataView.getBigUint64(offset, le)) @@ -159,7 +138,7 @@ export default class TwoBitFile { } /** - * @returns {Promise} for an array of string sequence names that are found in the file + * @returns for an array of string sequence names that are found in the file */ async getSequenceNames() { const index = await this.getIndex() @@ -167,8 +146,8 @@ export default class TwoBitFile { } /** - * @returns {Promise} for an object listing the lengths of all sequences like - * `{seqName: length, ...}`. + * @returns object listing the lengths of all sequences like `{seqName: + * length, ...}`. * * note: this is a relatively slow operation especially if there are many * refseqs in the file, if you can get this information from a different file @@ -177,9 +156,9 @@ export default class TwoBitFile { async getSequenceSizes() { const index = await this.getIndex() const seqNames = Object.keys(index) - const sizes = await Promise.all(Object.values(index).map(offset => - this._getSequenceSize(offset), - )) + const sizes = await Promise.all( + Object.values(index).map(offset => this._getSequenceSize(offset)), + ) const returnObject = {} as Record for (const [index_, seqName] of seqNames.entries()) { returnObject[seqName] = sizes[index_] @@ -188,16 +167,14 @@ export default class TwoBitFile { } /** - * @param {string} seqName name of the sequence - * @returns {Promise} for the sequence's length, or undefined if it is not in the file + * @param seqName name of the sequence + * + * @returns sequence length, or undefined if it is not in the file */ async getSequenceSize(seqName: string) { const index = await this.getIndex() const offset = index[seqName] - if (!offset) { - return undefined - } - return this._getSequenceSize(offset) + return offset ? this._getSequenceSize(offset) : undefined } async _getSequenceSize(offset: number) { @@ -205,13 +182,7 @@ export default class TwoBitFile { } async _record1(offset2: number, len = 8) { - const { buffer } = await this.filehandle.read( - Buffer.allocUnsafe(len), - 0, - len, - offset2, - ) - const b = buffer + const b = await this.filehandle.read(len, offset2) const le = true let offset = 0 const dataView = new DataView(b.buffer, b.byteOffset, b.length) @@ -224,13 +195,7 @@ export default class TwoBitFile { } async _record2(offset2: number, len: number) { - const { buffer } = await this.filehandle.read( - Buffer.allocUnsafe(len), - 0, - len, - offset2, - ) - const b = buffer + const b = await this.filehandle.read(len, offset2) const le = true let offset = 0 const dataView = new DataView(b.buffer, b.byteOffset, b.length) @@ -257,13 +222,7 @@ export default class TwoBitFile { } } async _record3(offset2: number, len: number) { - const { buffer } = await this.filehandle.read( - Buffer.allocUnsafe(len), - 0, - len, - offset2, - ) - const b = buffer + const b = await this.filehandle.read(len, offset2) const le = true let offset = 0 const dataView = new DataView(b.buffer, b.byteOffset, b.length) @@ -314,10 +273,15 @@ export default class TwoBitFile { } /** - * @param {string} seqName name of the sequence you want - * @param {number} [regionStart] optional 0-based half-open start of the sequence region to fetch. - * @param {number} [regionEnd] optional 0-based half-open end of the sequence region to fetch. defaults to end of the sequence - * @returns {Promise} for a string of sequence bases + * @param seqName name of the sequence you want + * + * @param [regionStart] optional 0-based half-open start of the sequence + * region to fetch. + * + * @param [regionEnd] optional 0-based half-open end of the sequence region + * to fetch. defaults to end of the sequence + * + * @returns for a string of sequence bases */ async getSequence( seqName: string, @@ -353,14 +317,10 @@ export default class TwoBitFile { record.maskBlocks.sizes, ) - const baseBytes = Buffer.allocUnsafe( - Math.ceil((regionEnd - regionStart) / 4) + 1, - ) + const baseBytesLen = Math.ceil((regionEnd - regionStart) / 4) + 1 const baseBytesOffset = Math.floor(regionStart / 4) - const { buffer } = await this.filehandle.read( - baseBytes, - 0, - baseBytes.length, + const buffer = await this.filehandle.read( + baseBytesLen, record.dnaPosition + baseBytesOffset, ) diff --git a/yarn.lock b/yarn.lock index 6d10f19..6bd453d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -969,11 +969,6 @@ balanced-match@^1.0.0: resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== -base64-js@^1.3.1: - version "1.5.1" - resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" - integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== - binary-extensions@^2.0.0: version "2.3.0" resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.3.0.tgz#f6e14a97858d327252200242d4ccfe522c445522" @@ -1011,14 +1006,6 @@ browserslist@^4.24.0, browserslist@^4.24.2: node-releases "^2.0.18" update-browserslist-db "^1.1.1" -buffer@^6.0.3: - version "6.0.3" - resolved "https://registry.yarnpkg.com/buffer/-/buffer-6.0.3.tgz#2ace578459cc8fbe2a70aaa8f52ee63b6a74c6c6" - integrity sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA== - dependencies: - base64-js "^1.3.1" - ieee754 "^1.2.1" - builtin-modules@^3.3.0: version "3.3.0" resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-3.3.0.tgz#cae62812b89801e9656336e46223e030386be7b6" @@ -1545,11 +1532,6 @@ es-to-primitive@^1.2.1: is-date-object "^1.0.5" is-symbol "^1.0.4" -es6-promisify@^6.1.1: - version "6.1.1" - resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-6.1.1.tgz#46837651b7b06bf6fff893d03f29393668d01621" - integrity sha512-HBL8I3mIki5C1Cc9QjKUenHtnG0A5/xA8Q/AllRcfiwl2CZFXGK7ddBiCoRwAix4i2KxcQfjtIVcrVbB3vbmwg== - esbuild@^0.21.3: version "0.21.5" resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.21.5.tgz#9ca301b120922959b766360d8ac830da0d02997d" @@ -1909,12 +1891,10 @@ functions-have-names@^1.2.3: resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== -generic-filehandle@^3.0.0: - version "3.2.0" - resolved "https://registry.yarnpkg.com/generic-filehandle/-/generic-filehandle-3.2.0.tgz#f65401ce71bccadb796335495c3d9250185876d4" - integrity sha512-tG6ZGpKVQn1N6WLlOFoDZL54wdrBSelY3Mk3R9nTPYas0odoY9LcGvUJmb9jGghFC4hy3WY8EqQUIQk0ni/0jg== - dependencies: - es6-promisify "^6.1.1" +generic-filehandle2@^0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/generic-filehandle2/-/generic-filehandle2-0.0.1.tgz#7f26ee54a939ed588d6bdb3a453bb2255ccd2be9" + integrity sha512-cySnWoVmNUSkRztAwlghNVAYXUh+VVy/fxn8tT3jZIo8UQEHkYL7ueSUseBZrwqBCq9n06Wp/F4xv2q2/SwYCQ== gensync@^1.0.0-beta.2: version "1.0.0-beta.2" @@ -2262,11 +2242,6 @@ html-void-elements@^2.0.0: resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-2.0.1.tgz#29459b8b05c200b6c5ee98743c41b979d577549f" integrity sha512-0quDb7s97CfemeJAnW9wC0hw78MtW7NU3hqtCD75g2vFlDLt36llsYD7uB7SUzojLMP24N5IatXf7ylGXiGG9A== -ieee754@^1.2.1: - version "1.2.1" - resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" - integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== - ignore@^5.2.0, ignore@^5.3.1: version "5.3.2" resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.2.tgz#3cd40e729f3643fd87cb04e50bf0eb722bc596f5" @@ -3644,7 +3619,7 @@ prelude-ls@^1.2.1: resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396" integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g== -prettier@^3.3.3: +prettier@^3.4.2: version "3.4.2" resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.4.2.tgz#a5ce1fb522a588bf2b78ca44c6e6fe5aa5a2b13f" integrity sha512-e9MewbtFo+Fevyuxn/4rrcDAaq0IYxPGLvObpQjiZBMAzB9IGmzlnG9RZy3FFas+eBMu2vA0CszMeduow5dIuQ==