Skip to content

Commit

Permalink
support BINUNICODE8 and BINBYTES8
Browse files Browse the repository at this point in the history
  • Loading branch information
ewfian committed Apr 16, 2023
1 parent 1410ac9 commit 24acf03
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 30 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
[![NPM Version](https://img.shields.io/npm/v/pickleparser?logo=npm)](https://www.npmjs.com/package/pickleparser)
[![License](https://img.shields.io/github/license/ewfian/pickleparser)](https://github.com/ewfian/pickleparser)

A pure Typescript implemented parser for [Python pickle format](https://docs.python.org/3.11/library/pickle.html)
A pure Javascript implemented parser for [Python pickle format](https://docs.python.org/3.11/library/pickle.html)


## Features

* Pure Typescript implemented.
* Most of [Pickle protocol version 4](https://peps.python.org/pep-3154/) opcodes supported.
* Fullly supports [Pickle protocol version 4](https://peps.python.org/pep-3154/) opcodes.
* Supports Browser.
* Supports Node.js.
* Provides tool to convert pickle file to JSON.

## Supported Opcodes
See: [Supported Opcodes](./SUPPORTED_OPCODES.md)
## Supported Protocol Version

* Pickle protocol version 0
* Pickle protocol version 1
* [Pickle protocol version 2 (Python 2.3)](https://peps.python.org/pep-0307/)
* Pickle protocol version 3 (Python 3.0)
* [Pickle protocol version 4 (Python 3.4)](https://peps.python.org/pep-3154/)

For more details, See: [Supported Opcodes](./SUPPORTED_OPCODES.md)

## Installation

Expand Down
4 changes: 2 additions & 2 deletions SUPPORTED_OPCODES.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@
| BINBYTES | B | 66 || Protocol 3 |
| SHORT_BINBYTES | C | 67 || Protocol 3 |
| SHORT_BINUNICODE | \x8c | 140 || Protocol 4 |
| BINUNICODE8 | \x8d | 141 | | Protocol 4 |
| BINBYTES8 | \x8e | 142 | | Protocol 4 |
| BINUNICODE8 | \x8d | 141 | | Protocol 4 |
| BINBYTES8 | \x8e | 142 | | Protocol 4 |
| EMPTY_SET | \x8f | 143 || Protocol 4 |
| ADDITEMS | \x90 | 144 || Protocol 4 |
| FROZENSET | \x91 | 145 || Protocol 4 |
Expand Down
8 changes: 8 additions & 0 deletions examples/bytes8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pickle
bytes8 = b'\x80\x04\x8e\4\0\0\0\0\0\0\0\xe2\x82\xac\x00.'

print(bytes8)

filehandler = open(b"bytes8.pkl", "wb")
filehandler.write(bytes8)
filehandler.close()
13 changes: 6 additions & 7 deletions examples/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ async function unpickle(fname) {
parser.registry.register('pathlib', 'PosixPath', PosixPath);
const obj = parser.load();
console.log(obj);
// =>
// PObject {
// data: 'test',
// set: [ false, 1, 2, 3, null, 'abc', 4294967295, 9007199254740991 ],
// fruits: [ 'apple', 'banana', 'cherry', 'orange' ]
// }
const codePoints = Array.from(obj)
.map((v) => v.codePointAt(0).toString(16))
.map((hex) => '\\u' + hex.padStart(4, 0) + '')
.join('');
console.log(codePoints);
}

unpickle('long4.pkl');
unpickle('u8.pkl');
8 changes: 8 additions & 0 deletions examples/u8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pickle
u8 = b'\x80\x04\x8d\4\0\0\0\0\0\0\0\xe2\x82\xac\x01.'

print(u8)

filehandler = open(b"u8.pkl", "wb")
filehandler.write(u8)
filehandler.close()
26 changes: 9 additions & 17 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ export class Parser {
case OP.SHORT_BINBYTES:
stack.push(reader.bytes(reader.byte()));
break;
case OP.BINBYTES8: {
stack.push(reader.bytes(reader.uint64()));
break;
}
case OP.BINSTRING:
stack.push(reader.string(reader.uint32(), 'ascii'));
break;
Expand All @@ -188,6 +192,9 @@ export class Parser {
case OP.SHORT_BINUNICODE:
stack.push(reader.string(reader.byte(), 'utf-8'));
break;
case OP.BINUNICODE8:
stack.push(reader.string(reader.uint64(), 'utf-8'));
break;

// Tuples
case OP.EMPTY_TUPLE:
Expand Down Expand Up @@ -427,23 +434,8 @@ export class Parser {
const buffer = new ArrayBuffer(8);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/DataView#64-bit_integer_values
const view = new DataView(buffer, 0, 8);
// split 64-bit number into two 32-bit parts
const left = view.getUint32(0, true);
const right = view.getUint32(4, true);
// combine the two 32-bit values
const number = left + 2 ** 32 * right;
if (!Number.isSafeInteger(number)) {
console.warn(number, 'exceeds MAX_SAFE_INTEGER. Precision may be lost');
}
// new Uint8Array([0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 255,
// new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 65535,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]) => 4294967295,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00]) => 4294967296,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00]) => 1099511627776,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00]) => 281474976710656,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00]) => 9007199254740991, // maximum precision
const subReader = new Reader(uint8);
const number = subReader.uint64();
return number;
}

Expand Down
21 changes: 21 additions & 0 deletions src/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,27 @@ export class Reader {
return this._dataView.getUint32(position, true);
}

uint64() {
const position = this.position;
this.skip(8);
// split 64-bit number into two 32-bit parts
const left = this._dataView.getUint32(position, true);
const right = this._dataView.getUint32(position + 4, true);
// combine the two 32-bit values
const number = left + 2 ** 32 * right;
if (!Number.isSafeInteger(number)) {
console.warn(number, 'exceeds MAX_SAFE_INTEGER. Precision may be lost');
}
// new Uint8Array([0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 255,
// new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 65535,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]) => 4294967295,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00]) => 4294967296,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00]) => 1099511627776,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00]) => 281474976710656,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00]) => 9007199254740991, // maximum precision
return number;
}

float32() {
const position = this.position;
this.skip(4);
Expand Down

0 comments on commit 24acf03

Please sign in to comment.