diff --git a/packages/pg-protocol/src/buffer-writer.ts b/packages/pg-protocol/src/buffer-writer.ts index cebb0d9ed..d206f322a 100644 --- a/packages/pg-protocol/src/buffer-writer.ts +++ b/packages/pg-protocol/src/buffer-writer.ts @@ -58,6 +58,26 @@ export class Writer { return this } + // Write an Int32 byte-length prefix immediately followed by the string's UTF-8 + // bytes. Postgres' Bind wire format prefixes every parameter with its length, + // and doing it in one method computes Buffer.byteLength ONCE — the previous + // `addInt32(Buffer.byteLength(s)).addString(s)` pairing scanned the string + // three times (byteLength for the prefix, byteLength again inside addString, + // then the encode), which is costly for large text parameters. + public addInt32PrefixedString(string: string): Writer { + const len = Buffer.byteLength(string) + this.ensure(4 + len) + const buffer = this.buffer + let offset = this.offset + buffer[offset++] = (len >>> 24) & 0xff + buffer[offset++] = (len >>> 16) & 0xff + buffer[offset++] = (len >>> 8) & 0xff + buffer[offset++] = (len >>> 0) & 0xff + buffer.write(string, offset, 'utf-8') + this.offset = offset + len + return this + } + public add(otherBuffer: Buffer): Writer { this.ensure(otherBuffer.length) otherBuffer.copy(this.buffer, this.offset) diff --git a/packages/pg-protocol/src/outbound-serializer.test.ts b/packages/pg-protocol/src/outbound-serializer.test.ts index 0d3e387e4..856ead7b9 100644 --- a/packages/pg-protocol/src/outbound-serializer.test.ts +++ b/packages/pg-protocol/src/outbound-serializer.test.ts @@ -129,6 +129,28 @@ describe('serializer', () => { .join(true, 'B') assert.deepEqual(actual, expectedBuffer) }) + + it('encodes a multi-byte string param with its UTF-8 byte length, not char length', function () { + // Guards the single-pass addInt32PrefixedString write path: the Int32 + // length prefix must be the UTF-8 byte count, not String.length. 'héllo中🎉' + // is 7 code points / 8 UTF-16 code units but 13 UTF-8 bytes. + const value = 'héllo中🎉' + const bytes = Buffer.from(value, 'utf8') + assert.notEqual(bytes.length, value.length) // sanity: the divergence we're testing + const actual = serialize.bind({ values: [value] }) + const expectedBuffer = new BufferList() + .addCString('') // portal + .addCString('') // statement + .addInt16(1) // param format code count + .addInt16(0) // format code for the one value (text) + .addInt16(1) // value count + .addInt32(bytes.length) // 13 — the UTF-8 byte length, NOT value.length (8) + .add(bytes) + .addInt16(1) // result format code count + .addInt16(0) // result format (text) + .join(true, 'B') + assert.deepEqual(actual, expectedBuffer) + }) }) it('with custom valueMapper', function () { diff --git a/packages/pg-protocol/src/serializer.ts b/packages/pg-protocol/src/serializer.ts index bb0441f56..137daad79 100644 --- a/packages/pg-protocol/src/serializer.ts +++ b/packages/pg-protocol/src/serializer.ts @@ -48,7 +48,7 @@ const password = (password: string): Buffer => { const sendSASLInitialResponseMessage = function (mechanism: string, initialResponse: string): Buffer { // 0x70 = 'p' - writer.addCString(mechanism).addInt32(Buffer.byteLength(initialResponse)).addString(initialResponse) + writer.addCString(mechanism).addInt32PrefixedString(initialResponse) return writer.flush(code.startup) } @@ -135,8 +135,8 @@ const writeValues = function (values: any[], valueMapper?: ValueMapper): void { } else { // add the param type (string) to the writer writer.addInt16(ParamType.STRING) - paramWriter.addInt32(Buffer.byteLength(mappedVal)) - paramWriter.addString(mappedVal) + // length prefix + UTF-8 bytes in one pass (Buffer.byteLength computed once) + paramWriter.addInt32PrefixedString(mappedVal) } } }