ft: 代码调整

3 年之前 · 5fb27cf696
--- a/VelocyPack.md
+++ b/VelocyPack.md
@ -24,97 +24,67 @@ indicates the type (and often the length) of the VPack value at hand:
 We first give an overview with a brief but accurate description for
 reference, for arrays and objects see below for details:

  - 0x00      : none - this indicates absence of any type and value,
                this is not allowed in VPack values
  - 0x01      : empty array
  - 0x02      : array without index table (all subitems have the same
                byte length), 1-byte byte length
  - 0x03      : array without index table (all subitems have the same
                byte length), 2-byte byte length
  - 0x04      : array without index table (all subitems have the same
                byte length), 4-byte byte length
  - 0x05      : array without index table (all subitems have the same
                byte length), 8-byte byte length
  - 0x06      : array with 1-byte index table offsets, bytelen and # subvals
  - 0x07      : array with 2-byte index table offsets, bytelen and # subvals
  - 0x08      : array with 4-byte index table offsets, bytelen and # subvals
  - 0x09      : array with 8-byte index table offsets, bytelen and # subvals
  - 0x0a      : empty object
  - 0x0b      : object with 1-byte index table offsets, sorted by
                attribute name, 1-byte bytelen and # subvals
  - 0x0c      : object with 2-byte index table offsets, sorted by
                attribute name, 2-byte bytelen and # subvals
  - 0x0d      : object with 4-byte index table offsets, sorted by
                attribute name, 4-byte bytelen and # subvals
  - 0x0e      : object with 8-byte index table offsets, sorted by
                attribute name, 8-byte bytelen and # subvals
  - 0x0f      : object with 1-byte index table offsets, not sorted by
                attribute name, 1-byte bytelen and # subvals
  - 0x10      : object with 2-byte index table offsets, not sorted by
                attribute name, 2-byte bytelen and # subvals
  - 0x11      : object with 4-byte index table offsets, not sorted by
                attribute name, 4-byte bytelen and # subvals
  - 0x12      : object with 8-byte index table offsets, not sorted by
                attribute name, 8-byte bytelen and # subvals
  - 0x13      : compact array, no index table
  - 0x14      : compact object, no index table
  - 0x15-0x16 : reserved
  - 0x17      : illegal - this type can be used to indicate a value that
                is illegal in the embedding application
  - 0x18      : null
  - 0x19      : false
  - 0x1a      : true
  - 0x1b      : double IEEE-754, 8 bytes follow, stored as little
                endian uint64 equivalent
  - 0x1c      : UTC-date in milliseconds since the epoch, stored as 8 byte
                signed int, little endian, two's complement
  - 0x1d      : external (only in memory): a char* pointing to the actual
                place in memory, where another VPack item resides, not
                allowed in VPack values on disk or on the network
  - 0x1e      : minKey, nonsensical value that compares < than all other values
  - 0x1f      : maxKey, nonsensical value that compares > than all other values
  - 0x20-0x27 : signed int, little endian, 1 to 8 bytes, number is V - 0x1f,
                two's complement
  - 0x28-0x2f : uint, little endian, 1 to 8 bytes, number is V - 0x27
  - 0x30-0x39 : small integers 0, 1, ... 9
  - 0x3a-0x3f : small negative integers -6, -5, ..., -1
  - 0x40-0xbe : UTF-8-string, using V - 0x40 bytes (not Unicode characters!),
                length 0 is possible, so 0x40 is the empty string,
                maximal length is 126, note that strings here are not
                zero-terminated and may contain NUL bytes
  - 0xbf      : long UTF-8-string, next 8 bytes are length of string in
                bytes (not Unicode characters) as little endian unsigned
                integer, note that long strings are not zero-terminated
                and may contain NUL bytes
  - 0xc0-0xc7 : binary blob, next V - 0xbf bytes are the length of blob in
                bytes, note that binary blobs are not zero-terminated
  - 0xc8-0xcf : positive long packed BCD-encoded float, V - 0xc7 bytes follow
                that encode in a little endian way the length of the
                mantissa in bytes. Directly after that follow 4 bytes
                encoding the (power of 10) exponent, by which the mantissa
                is to be multiplied, stored as little endian two's
                complement signed 32-bit integer. After that, as many
                bytes follow as the length information at the beginning
                has specified, each byte encodes two digits in
                big-endian packed BCD
                Example: 12345 decimal can be encoded as
                         0xc8 0x03 0x00 0x00 0x00 0x00 0x01 0x23 0x45
                      or 0xc8 0x03 0xff 0xff 0xff 0xff 0x12 0x34 0x50
  - 0xd0-0xd7 : negative long packed BCD-encoded float, V - 0xcf bytes
                follow that encode in a little endian way the length of
                the mantissa in bytes. After that, same as positive long
                packed BCD-encoded float above.
  - 0xd8-0xed : reserved
  - 0xee-0xef : value tagging for logical types
  - 0xf0-0xff : custom types
 - `0x00`        : none - this indicates absence of any type and value, this is not allowed in VPack values
 - `0x01`        : empty array
 - `0x02`        : array without index table (all subitems have the same byte length), 1-byte byte length
 - `0x03`        : array without index table (all subitems have the same byte length), 2-byte byte length
 - `0x04`        : array without index table (all subitems have the same byte length), 4-byte byte length
 - `0x05`        : array without index table (all subitems have the same byte length), 8-byte byte length
 - `0x06`        : array with 1-byte index table offsets, bytelen and # subvals
 - `0x07`        : array with 2-byte index table offsets, bytelen and # subvals
 - `0x08`        : array with 4-byte index table offsets, bytelen and # subvals
 - `0x09`        : array with 8-byte index table offsets, bytelen and # subvals
 - `0x0a`        : empty object
 - `0x0b`        : object with 1-byte index table offsets, sorted by attribute name, 1-byte bytelen and # subvals
 - `0x0c`        : object with 2-byte index table offsets, sorted by attribute name, 2-byte bytelen and # subvals
 - `0x0d`        : object with 4-byte index table offsets, sorted by attribute name, 4-byte bytelen and # subvals
 - `0x0e`        : object with 8-byte index table offsets, sorted by attribute name, 8-byte bytelen and # subvals
 - `0x0f`        : object with 1-byte index table offsets, not sorted by attribute name, 1-byte bytelen and # subvals
 - `0x10`        : object with 2-byte index table offsets, not sorted by attribute name, 2-byte bytelen and # subvals
 - `0x11`        : object with 4-byte index table offsets, not sorted by attribute name, 4-byte bytelen and # subvals
 - `0x12`        : object with 8-byte index table offsets, not sorted by attribute name, 8-byte bytelen and # subvals
 - `0x13`        : compact array, no index table
 - `0x14`        : compact object, no index table
 - `0x15`-`0x16` : reserved
 - `0x17`        : illegal - this type can be used to indicate a value that is illegal in the embedding application
 - `0x18`        : null
 - `0x19`        : false
 - `0x1a`        : true
 - `0x1b`        : double IEEE-754, 8 bytes follow, stored as little endian uint64 equivalent
 - `0x1c`        : UTC-date in milliseconds since the epoch, stored as 8 byte signed int, little endian, two's complement
 - `0x1d`        : external (only in memory): a char* pointing to the actual place in memory, where another VPack item
  resides, not allowed in VPack values on disk or on the network
 - `0x1e`        : minKey, nonsensical value that compares < than all other values
 - `0x1f`        : maxKey, nonsensical value that compares > than all other values
 - `0x20`-`0x27` : signed int, little endian, 1 to 8 bytes, number is V - `0x1f`, two's complement
 - `0x28`-`0x2f` : uint, little endian, 1 to 8 bytes, number is V - `0x27`
 - `0x30`-`0x39` : small integers 0, 1, ... 9
 - `0x3a`-`0x3f` : small negative integers -6, -5, ..., -1
 - `0x40`-`0xbe` : UTF-8-string, using V - `0x40` bytes (not Unicode characters!), length 0 is possible, so `0x40` is the
  empty string, maximal length is 126, note that strings here are not zero-terminated and may contain NUL bytes
 - `0xbf`        : long UTF-8-string, next 8 bytes are length of string in bytes (not Unicode characters) as little
  endian unsigned integer, note that long strings are not zero-terminated and may contain NUL bytes
 - `0xc0`-`0xc7` : binary blob, next V - `0xbf` bytes are the length of blob in bytes, note that binary blobs are not
  zero-terminated
 - `0xc8`-`0xcf` : positive long packed BCD-encoded float, V - `0xc7` bytes follow that encode in a little endian way the
  length of the mantissa in bytes. Directly after that follow 4 bytes encoding the (power of 10) exponent, by which the
  mantissa is to be multiplied, stored as little endian two's complement signed 32-bit integer. After that, as many
  bytes follow as the length information at the beginning has specified, each byte encodes two digits in big-endian
  packed BCD. Example: 12345 decimal can be encoded as
  `c8 03 00 00 00 00 01 23 45` or
  `c8 03 ff ff ff ff 12 34 50`
 - `0xd0`-`0xd7` : negative long packed BCD-encoded float, V - `0xcf` bytes follow that encode in a little endian way the
  length of the mantissa in bytes. After that, same as positive long packed BCD-encoded float above.
 - `0xd8`-`0xed` : reserved
 - `0xee`-`0xef` : value tagging for logical types
 - `0xf0`-`0xff` : custom types


 ## Arrays

 Empty arrays are simply a single byte 0x01.
 Empty arrays are simply a single byte `0x01`.

 We next describe the type cases 0x02 to 0x09, see below for the
 special compact type 0x13.
 We next describe the type cases `0x02` to `0x09`, see below for the special compact type `0x13`.

 Non-empty arrays look like one of the following:

@ -157,64 +127,48 @@ or
    INDEXTABLE with 8 byte per entry
    NRITEMS in 8 bytes

 If any optional padding is allowed for a type, the padding must consist
 of exactly that many bytes that the length of the padding, the length of
 BYTELENGTH and the length of NRITEMS (if present) sums up to 8. If the
 length of BYTELENGTH is already 8, there is no padding allowed. The
 entire padding must consist of zero bytes (ASCII NUL). 
 If any optional padding is allowed for a type, the padding must consist of exactly that many bytes that the length of
 the padding, the length of BYTELENGTH and the length of NRITEMS (if present) sums up to 8. If the length of BYTELENGTH
 is already 8, there is no padding allowed. The entire padding must consist of zero bytes (ASCII NUL).

 Numbers (for byte length, number of subvalues and offsets in the
 INDEXTABLE) are little endian unsigned integers, using 1 byte for
 types 0x02 and 0x06, 2 bytes for types 0x03 and 0x07, 4 bytes for types
 0x04 and 0x08, and 8 bytes for types 0x05 and 0x09.
 Numbers (for byte length, number of subvalues and offsets in the INDEXTABLE) are little endian unsigned integers, using
 1 byte for types `0x02` and `0x06`, 2 bytes for types `0x03` and `0x07`, 4 bytes for types
 `0x04` and `0x08`, and 8 bytes for types `0x05` and `0x09`.

 NRITEMS is a single number as described above.

 The INDEXTABLE consists of:
  - for types 0x06-0x09 an array of offsets (unaligned, in the number
    format described above) earlier offsets reside at lower addresses.
    Offsets are measured from the start of the VPack value.


 Non-empty arrays of types 0x06 to 0x09 have a small header including
 their byte length, the number of subvalues, then all the subvalues and
 finally an index table containing offsets to the subvalues. To find the
 index table, find the number of subvalues, then the end, and from that
 the base of the index table, considering how wide its entries are.

 For types 0x02 to 0x05 there is no offset table and no number of items.
 The first item begins at address A+2, A+3, A+5 or respectively A+9,
 depending on the type and thus the width of the byte length field. Note
 the following special rule: The actual position of the first subvalue
 is allowed to be further back, after some run of padding zero bytes.

 For example, if 2 bytes are used for both the byte length (BYTELENGTH),
 then an optional padding of 4 zero bytes is then allowed to follow, and
 the actual VPack subvalues can start at A+9. 
 This is to give a program that builds a VPack value the opportunity to
 reserve 8 bytes in the beginning and only later find out that fewer bytes
 suffice to write the byte length. One can determine the number of
 subvalues by finding the first subvalue, its byte length, and
 dividing the amount of available space by it.

 For types 0x06 to 0x09 the offset table describes where the subvalues
 reside. It is not necessary for the subvalues to start immediately after
 the number of subvalues field. 

 As above, it is allowed to include optional padding. Again here, any
 padding must consist of a run of consecutive zero bytes (ASCII NUL) and
 must be as long that it fills up the length of BYTELENGTH and the length 
 of NRITEMS to 8.

 For example, if both BYTELENGTH and NRITEMS can be expressed using 2 bytes
 each, the sum of their lengths is 4. It is therefore allowed to add 4
 bytes of padding here, so that the first subvalue could be at address A+9.

 There is one exception for the 8-byte numbers case (type 0x05): 
 In this case the number of elements is moved behind the index table. 
 This is to get away without moving memory when one has reserved 8 bytes 
 in the beginning and later noticed that all 8 bytes are needed for the
 byte length. For this case it is not allowed to include any padding.

 - for types `0x06`-`0x09` an array of offsets (unaligned, in the number format described above) earlier offsets reside
  at lower addresses. Offsets are measured from the start of the VPack value.

 Non-empty arrays of types `0x06` to `0x09` have a small header including their byte length, the number of subvalues,
 then all the subvalues and finally an index table containing offsets to the subvalues. To find the index table, find the
 number of subvalues, then the end, and from that the base of the index table, considering how wide its entries are.

 For types `0x02` to `0x05` there is no offset table and no number of items. The first item begins at address A+2, A+3,
 A+5 or respectively A+9, depending on the type and thus the width of the byte length field. Note the following special
 rule: The actual position of the first subvalue is allowed to be further back, after some run of padding zero bytes.

 For example, if 2 bytes are used for both the byte length (BYTELENGTH), then an optional padding of 4 zero bytes is then
 allowed to follow, and the actual VPack subvalues can start at A+9. This is to give a program that builds a VPack value
 the opportunity to reserve 8 bytes in the beginning and only later find out that fewer bytes suffice to write the byte
 length. One can determine the number of subvalues by finding the first subvalue, its byte length, and dividing the
 amount of available space by it.

 For types `0x06` to `0x09` the offset table describes where the subvalues reside. It is not necessary for the subvalues
 to start immediately after the number of subvalues field.

 As above, it is allowed to include optional padding. Again here, any padding must consist of a run of consecutive zero
 bytes (ASCII NUL) and must be as long that it fills up the length of BYTELENGTH and the length of NRITEMS to 8.

 For example, if both BYTELENGTH and NRITEMS can be expressed using 2 bytes each, the sum of their lengths is 4. It is
 therefore allowed to add 4 bytes of padding here, so that the first subvalue could be at address A+9.

 There is one exception for the 8-byte numbers case (type `0x05`):
 In this case the number of elements is moved behind the index table. This is to get away without moving memory when one
 has reserved 8 bytes in the beginning and later noticed that all 8 bytes are needed for the byte length. For this case
 it is not allowed to include any padding.

 All offsets are measured from base A.

@ -250,23 +204,19 @@ possible, though not necessarily advised to use:
    0b 00 00 00 00 00 00 00
    03 00 00 00 00 00 00 00

 Note that it is not recommended to encode short arrays in too long a
 format.
 Note that it is not recommended to encode short arrays in too long a format.

 We now describe the special type 0x13, which is useful for a
 particularly compact array representation. Note that to some extent this
 goes against the principles of the VelocyPack format, since quick access
 to subvalues is no longer possible, all items in the array must be
 scanned to find a particular one. However, there are certain use cases
 for VelocyPack which only require sequential access (for example JSON
 dumping) and have a particular need for compactness.
 We now describe the special type `0x13`, which is useful for a particularly compact array representation. Note that to
 some extent this goes against the principles of the VelocyPack format, since quick access to subvalues is no longer
 possible, all items in the array must be scanned to find a particular one. However, there are certain use cases for
 VelocyPack which only require sequential access (for example JSON dumping) and have a particular need for compactness.

 The overall format of this array type is

  0x13 as type byte
  BYTELENGTH
  sub VPack values
  NRITEMS
    0x13 as type byte
    BYTELENGTH
    sub VPack values
    NRITEMS

 There is no index table at all, although the sub VelocyPack values can
 have different byte sizes. The BYTELENGTH and NRITEMS are encoded in a
@ -297,24 +247,22 @@ Here is an example, the array [1, 16] can be encoded as follows:

 ## Objects

 Empty objects are simply a single byte 0x0a.
 Empty objects are simply a single byte `0x0a`.

 We next describe the type cases 0x0b to 0x12, see below for the
 special compact type 0x14.
 We next describe the type cases `0x0b` to `0x12`, see below for the special compact type `0x14`.

 Non-empty objects look like this:

  one of 0x0b - 0x12
  BYTELENGTH
  optional NRITEMS
  sub VPack values as pairs of attribute and value
  optional INDEXTABLE
  NRITEMS for the 8-byte case
    one of 0x0b - 0x12
    BYTELENGTH
    optional NRITEMS
    sub VPack values as pairs of attribute and value
    optional INDEXTABLE
    NRITEMS for the 8-byte case

 Numbers (for byte length, number of subvalues and offsets in the
 INDEXTABLE) are little endian unsigned integers, using 1 byte for
 types 0x0b and 0x0f, 2 bytes for types 0x0c and 0x10, 4 bytes for types
 0x0d and 0x11, and 8 bytes for types 0x0e and 0x12.
 Numbers (for byte length, number of subvalues and offsets in the INDEXTABLE) are little endian unsigned integers, using
 1 byte for types `0x0b` and `0x0f`, 2 bytes for types `0x0c` and `0x10`, 4 bytes for types
 `0x0d` and `0x11`, and 8 bytes for types `0x0e` and `0x12`.

 NRITEMS is a single number as described above.

@ -323,48 +271,36 @@ The INDEXTABLE consists of:
    above) earlier offsets reside at lower addresses.
    Offsets are measured from the beginning of the VPack value.

 Non-empty objects have a small header including their byte length, the
 number of subvalues, then all the subvalues and finally an index table
 containing offsets to the subvalues. To find the index table, find
 number of subvalues, then the end, and from that the base of the index
 table, considering how wide its entries are.
 Non-empty objects have a small header including their byte length, the number of subvalues, then all the subvalues and
 finally an index table containing offsets to the subvalues. To find the index table, find number of subvalues, then the
 end, and from that the base of the index table, considering how wide its entries are.

 For all types the offset table describes where the subvalues reside. It
 is not necessary for the subvalues to start immediately after the number
 of subvalues field. For performance reasons when building the value, it
 could be desirable to reserve 8 bytes for the byte length and the number
 of subvalues and not fill the gap, even though it turns out later that
 offsets and thus the byte length only uses 2 bytes, say.
 For all types the offset table describes where the subvalues reside. It is not necessary for the subvalues to start
 immediately after the number of subvalues field. For performance reasons when building the value, it could be desirable
 to reserve 8 bytes for the byte length and the number of subvalues and not fill the gap, even though it turns out later
 that offsets and thus the byte length only uses 2 bytes, say.

 There is one special case: the empty object is simply stored as the
 single byte 0x0a.
 There is one special case: the empty object is simply stored as the single byte `0x0a`.

 There is another exception: For 8-byte numbers (0x12) the number of
 subvalues is stored behind the INDEXTABLE. This is to get away without
 moving memory when one has reserved 8 bytes in the beginning and later
 noticed that all 8 bytes are needed for the byte length.
 There is another exception: For 8-byte numbers (`0x12`) the number of subvalues is stored behind the INDEXTABLE. This is
 to get away without moving memory when one has reserved 8 bytes in the beginning and later noticed that all 8 bytes are
 needed for the byte length.

 All offsets are measured from base A.

 Each entry consists of two parts, the key and the value, they are
 encoded as normal VPack values as above, the first is always a short or
 long UTF-8 string starting with a byte 0x40-0xbf as described below. The
 second is any other VPack value.

 There is one extension: For the key it is possible to use the positive
 small integer values 0x30-0x39 or an unsigned integer starting with a
 type byte of 0x28-0x2f. Any such integer value is an index into an
 outside-given table of attribute names. These are convenient when only
 very few attribute names occur or some are repeated very often. The
 standard way to encode such an attribute name table is as a VPack array
 of strings as specified here.

 Objects are always stored with sorted key/value pairs, sorted by bytewise 
 comparions of the keys on each nesting level. Sorting has some overhead 
 but will allow looking up keys in logarithmic time later. Note that only the 
 index table needs to be sorted, it is not required that the offsets in 
 these tables are increasing. Since the index table resides after the actual 
 subvalues, one can build up a complex VPack value by writing linearly.
 Each entry consists of two parts, the key and the value, they are encoded as normal VPack values as above, the first is
 always a short or long UTF-8 string starting with a byte `0x40`-`0xbf` as described below. The second is any other VPack
 value.

 There is one extension: For the key it is possible to use the positive small integer values `0x30`-`0x39` or an unsigned
 integer starting with a type byte of `0x28`-`0x2f`. Any such integer value is an index into an outside-given table of
 attribute names. These are convenient when only very few attribute names occur or some are repeated very often. The
 standard way to encode such an attribute name table is as a VPack array of strings as specified here.

 Objects are always stored with sorted key/value pairs, sorted by bytewise comparisons of the keys on each nesting level.
 Sorting has some overhead but will allow looking up keys in logarithmic time later. Note that only the index table needs
 to be sorted, it is not required that the offsets in these tables are increasing. Since the index table resides after
 the actual subvalues, one can build up a complex VPack value by writing linearly.

 Example: the object `{"a": 12, "b": true, "c": "xyz"}` can have the hexdump:

@ -386,53 +322,43 @@ entries, as in this example:
    41 63 43 78 79 7a
    0c 00 00 00 09 00 00 00 10 00 00 00

 Similarly with type 0x0c and 2-byte offsets, byte length and number of
 subvalues, or with type 0x0e and 8-byte numbers.
 Similarly with type `0x0c` and 2-byte offsets, byte length and number of subvalues, or with type `0x0e` and 8-byte
 numbers.

 Note that it is not recommended to encode short objects with too long
 index tables.
 Note that it is not recommended to encode short objects with too long index tables.

 ### Special compact objects

 We now describe the special type 0x14, which is useful for a
 particularly compact object representation. Note that to some extent
 this goes against the principles of the VelocyPack format, since quick
 access to subvalues is no longer possible, all key/value pairs in the
 object must be scanned to find a particular one. However, there are
 certain use cases for VelocyPack which only require sequential access
 We now describe the special type `0x14`, which is useful for a particularly compact object representation. Note that to
 some extent this goes against the principles of the VelocyPack format, since quick access to subvalues is no longer
 possible, all key/value pairs in the object must be scanned to find a particular one. However, there are certain use
 cases for VelocyPack which only require sequential access
 (for example JSON dumping) and have a particular need for compactness.

 The overall format of this object type is

  0x14 as type byte
  BYTELENGTH
  sub VPack key/value pairs
  NRPAIRS
    0x14 as type byte
    BYTELENGTH
    sub VPack key/value pairs
    NRPAIRS

 There is no index table at all, although the sub VelocyPack values can
 have different byte sizes. The BYTELENGTH and NRPAIRS are encoded in a
 special format, which we describe now. It is the same as for the special
 compact array type 0x13, which we repeat here for the sake of
 completeness.
 There is no index table at all, although the sub VelocyPack values can have different byte sizes. The BYTELENGTH and
 NRPAIRS are encoded in a special format, which we describe now. It is the same as for the special compact array
 type `0x13`, which we repeat here for the sake of completeness.

 The BYTELENGTH consists of 1 to 8 bytes, of which all but the last one
 have their high bit set. Thus, the high bits determine, how many bytes
 are actually used. The lower 7 bits of all these bits together comprise
 the actual byte length in a little endian fashion. That is, the byte at
 address A+1 contains the least significant 7 bits (0 to 6) of the byte
 length, the following byte at address A+2 contains the bits 7 to 13, and
 so on. Since the total number of bytes is limited to 8, this encodes
 unsigned integers of up to 56 bits, which is the overall limit for the
 size of such a compact array representation.

 The NRPAIRS entry is encoded essentially the same, except that it
 is laid out in reverse order in memory. That is, one has to use the
 BYTELENGTH to find the end of the array value and go back bytes until
 one finds a byte with high bit reset. The last byte (at the highest
 memory address) contains the least significant 7 bits of the NRPAIRS
 value, the second one bits 7 to 13 and so on.
 The BYTELENGTH consists of 1 to 8 bytes, of which all but the last one have their high bit set. Thus, the high bits
 determine, how many bytes are actually used. The lower 7 bits of all these bits together comprise the actual byte length
 in a little endian fashion. That is, the byte at address A+1 contains the least significant 7 bits (0 to 6) of the byte
 length, the following byte at address A+2 contains the bits 7 to 13, and so on. Since the total number of bytes is
 limited to 8, this encodes unsigned integers of up to 56 bits, which is the overall limit for the size of such a compact
 array representation.

 Here is an example, the object {"a":1, "b":16} can be encoded as follows:
 The NRPAIRS entry is encoded essentially the same, except that it is laid out in reverse order in memory. That is, one
 has to use the BYTELENGTH to find the end of the array value and go back bytes until one finds a byte with high bit
 reset. The last byte (at the highest memory address) contains the least significant 7 bits of the NRPAIRS value, the
 second one bits 7 to 13 and so on.

 Here is an example, the object `{"a":1, "b":16}` can be encoded as follows:

    14 0a
    41 61 31 42 62 28 10
@ -441,21 +367,18 @@ Here is an example, the object {"a":1, "b":16} can be encoded as follows:

 ## Doubles

 Type 0x1b indicates a double IEEE-754 value using the 8 bytes following
 the type byte. To guarantee platform-independentness the details of the
 byte order are as follows. Encoding is done by using memcpy to copy the
 internal double value to an uint64\_t. This 64-bit unsigned integer is
 then stored as little endian 8 byte integer in the VPack value. Decoding
 works in the opposite direction. This should sort out the undetermined
 byte order in IEEE-754 in practice.
 Type `0x1b` indicates a double IEEE-754 value using the 8 bytes following the type byte. To guarantee
 platform-independentness the details of the byte order are as follows. Encoding is done by using memcpy to copy the
 internal double value to an uint64\_t. This 64-bit unsigned integer is then stored as little endian 8 byte integer in
 the VPack value. Decoding works in the opposite direction. This should sort out the undetermined byte order in IEEE-754
 in practice.


 ## Dates

 Type 0x1c indicates a signed 64-int integer stored in 8 bytes little
 endian two's complement notation directly after the type. The value means
 a universal UTC-time measured in milliseconds since the epoch, which is
 00:00 on 1 January 1970 UTC.
 Type `0x1c` indicates a signed 64-int integer stored in 8 bytes little endian two's complement notation directly after
 the type. The value means a universal UTC-time measured in milliseconds since the epoch, which is 00:00 on 1 January
 1970 UTC.


 ## External VPack values
@ -465,54 +388,43 @@ or network. Therefore, we only need to specify that the following k
 bytes are the memcpy of a char* on the current architecture. That char*
 points to the actual VPack value elsewhere in memory.

 ## Artificial minimal and maximal keys

 ## Artifical minimal and maximal keys

 These values of types 0x1e and 0x1f have no meaning other than comparing
 smaller or greater respectively than any other VPack value. The idea is
 that these can be used in systems that define a total order on all VPack
 values to specify left or right ends of infinite intervals.
 These values of types `0x1e` and `0x1f` have no meaning other than comparing smaller or greater respectively than any
 other VPack value. The idea is that these can be used in systems that define a total order on all VPack values to
 specify left or right ends of infinite intervals.


 ## Integer types

 There are different ways to specify integers. For small values -6 to 9
 inclusively there are specific type bytes in the range 0x30 to 0x3f to
 allow for storage in a single byte. After that there are signed and
 unsigned integer types that can code in the type byte the number of
 bytes used (ranges 0x20-0x27 for signed and 0x28-0x2f for unsigned).

 There are different ways to specify integers. For small values -6 to 9 inclusively there are specific type bytes in the
 range `0x30` to `0x3f` to allow for storage in a single byte. After that there are signed and unsigned integer types
 that can code in the type byte the number of bytes used (ranges `0x20`-`0x27` for signed and `0x28`-`0x2f` for unsigned)
 .

 ## Null and boolean values

 These three values use a single byte to store the corresponding JSON
 values.

 These three values use a single byte to store the corresponding JSON values.

 ## Strings

 Strings are stored as UTF-8 encoded byte sequences. There are two
 variants, a short one and a long one. In the short one, the byte length
 (not the number of UTF-8 characters) is directly encoded in the type,
 and this works up to and including byte length 126. Types 0x40 to 0xbe
 are used for this and the byte length is V - 0x3f, if V is the type
 byte. For strings longer than 126 bytes, the type byte is 0xbf and the
 byte length of the string is stored in the first 8 bytes after the type
 byte, using a little endian unsigned integer representation. The actual
 string follows after these 8 bytes. There is no terminating zero byte in
 either case and the string may contain zero bytes.
 Strings are stored as UTF-8 encoded byte sequences. There are two variants, a short one and a long one. In the short
 one, the byte length
 (not the number of UTF-8 characters) is directly encoded in the type, and this works up to and including byte length

 126. Types `0x40` to `0xbe`
     are used for this and the byte length is V - `0x3f`, if V is the type byte. For strings longer than 126 bytes, the
     type byte is `0xbf` and the byte length of the string is stored in the first 8 bytes after the type byte, using a
     little endian unsigned integer representation. The actual string follows after these 8 bytes. There is no
     terminating zero byte in either case and the string may contain zero bytes.

 ## Binary data

 The type bytes 0xc0 to 0xc7 allow to store arbitrary binary byte
 sequences as a VPack value. The format is as follows: If V is the type
 byte, then V - 0xbf bytes follow it to make a little endian unsigned
 integer representing the length of the binary data, which directly
 follows these length bytes. No alignment is guaranteed. The content is
 The type bytes `0xc0` to `0xc7` allow to store arbitrary binary byte sequences as a VPack value. The format is as
 follows: If V is the type byte, then V - `0xbf` bytes follow it to make a little endian unsigned integer representing
 the length of the binary data, which directly follows these length bytes. No alignment is guaranteed. The content is
 entirely up to the user.


 ## Packed BCD long floats

 These types are used to represent arbitrary precision decimal numbers.
@ -524,57 +436,44 @@ format of these values is:
    EXPONENT (as 4-byte little endian signed two's complement integer)
    MANTISSA (as packed BCD-encoded integer, big-endian)

 The type byte describes the sign of the number as well as the number of
 bytes used to specify the byte length of the mantissa. As usual, if V is
 the type byte, then V - 0xc7 (in the positive case) or V - 0xcf (in the
 negative case) bytes are used for the length of the mantissa, stored as
 little endian unsigned integer directly after the byte length. After
 this follow exactly 4 bytes (little endian signed two's complement
 integer) to specify the exponent. After the exponent, the actual
 mantissa bytes follow.
 The type byte describes the sign of the number as well as the number of bytes used to specify the byte length of the
 mantissa. As usual, if V is the type byte, then V - `0xc7` (in the positive case) or V - `0xcf` (in the negative case)
 bytes are used for the length of the mantissa, stored as little endian unsigned integer directly after the byte length.
 After this follow exactly 4 bytes (little endian signed two's complement integer) to specify the exponent. After the
 exponent, the actual mantissa bytes follow.

 Packed BCD is used, so that each byte stores exactly 2 decimal digits as
 in 0x34 for the decimal digits 34. Therefore, the mantissa always has an
 even number of decimal digits. Note that the mantissa is stored in big
 endian form, to make parsing and dumping efficient. This leads to the
 "unholy nibble problem": When a JSON parser sees the beginning of a
 longish number, it does not know whether an even or odd number of digits
 follow. However, for efficiency reasons it wants to start writing bytes
 to the output as it reads the input. This is, where the exponent comes
 to the rescue, which is illustrated by the following example:
 Packed BCD is used, so that each byte stores exactly 2 decimal digits as in `0x34` for the decimal digits 34. Therefore,
 the mantissa always has an even number of decimal digits. Note that the mantissa is stored in big endian form, to make
 parsing and dumping efficient. This leads to the
 "unholy nibble problem": When a JSON parser sees the beginning of a longish number, it does not know whether an even or
 odd number of digits follow. However, for efficiency reasons it wants to start writing bytes to the output as it reads
 the input. This is, where the exponent comes to the rescue, which is illustrated by the following example. 12345 decimal
 can be encoded as:

    12345 decimal can be encoded as:
    c8 03 00 00 00 00 01 23 45
    c8 03 ff ff ff ff 12 34 50

    0xc8 0x03 0x00 0x00 0x00 0x00 0x01 0x23 0x45
    0xc8 0x03 0xff 0xff 0xff 0xff 0x12 0x34 0x50
 The former encoding puts a leading 0 in the first byte and uses exponent 0, the latter encoding directly starts putting
 two decimal digits in one byte and then in the end has to "erase" the trailing 0 by using exponent -1, encoded by the 4
 byte sequence `ff ff ff ff`.

 The former encoding puts a leading 0 in the first byte and uses exponent
 0, the latter encoding directly starts putting two decimal digits in one
 byte and then in the end has to "erase" the trailing 0 by using exponent
 -1, encoded by the 4 byte sequence 0xff 0xff 0xff 0xff.

 There for the unholy nibble problem is solved and parsing (and indeed
 dumping) can be efficient.
 Therefore, the unholy nibble problem is solved and parsing (and indeed dumping) can be efficient.


 ## Tagging

 Types 0xee-0xef are used for tagging of values to implement logical
 types.
 Types `0xee`-`0xef` are used for tagging of values to implement logical types.

 For example, if type 0x1c did not exist, the database driver could
 serialize a timestamp object (Date in JavaScript, Instant in Java, etc)
 into a Unix timestamp, a 64-bit integer. Assuming the lack of schema,
 upon deserialization it would not be possible to tell an integer from
 a timestamp and deserialize the value accordingly.
 For example, if type `0x1c` did not exist, the database driver could serialize a timestamp object (Date in JavaScript,
 Instant in Java, etc)
 into a Unix timestamp, a 64-bit integer. Assuming the lack of schema, upon deserialization it would not be possible to
 tell an integer from a timestamp and deserialize the value accordingly.

 Type tagging resolves this by attaching an integer tag to values that
 can then be read when deserializing the value, e.g. that tag=1 is a
 timestamp and the relevant timestamp class should be used.
 Type tagging resolves this by attaching an integer tag to values that can then be read when deserializing the value,
 e.g. that tag=1 is a timestamp and the relevant timestamp class should be used.

 The tag values are specified separately and applications can also
 specify their own to have the database driver deserialize their specific
 data types into the appropriate classes (including models).
 The tag values are specified separately and applications can also specify their own to have the database driver
 deserialize their specific data types into the appropriate classes (including models).

 Essentially this is object-relational mapping for parts of documents.

@ -600,22 +499,47 @@ to derive the byte length of each custom data type.

 The following user-defined types exist:

  - 0xf0      : 1 byte payload, directly following the type byte
  - 0xf1      : 2 bytes payload, directly following the type byte
  - 0xf2      : 4 bytes payload, directly following the type byte
  - 0xf3      : 8 bytes payload, directly following the type byte
  - 0xf4-0xf6 : length of the payload is described by a single further 
                unsigned byte directly following the type byte, the
                payload of that many bytes follows
  - 0xf7-0xf9 : length of the payload is described by two bytes (little 
                endian unsigned integer) directly following the type
                byte, the payload of that many bytes follows
  - 0xfa-0xfc : length of the payload is described by four bytes (little 
                endian unsigned integer) directly following the type
                byte, the payload of that many bytes follows
  - 0xfd-0xff : length of the payload is described by eight bytes (little 
                endian unsigned integer) directly following the type
                byte, the payload of that many bytes follows

 Note: In types 0xf4 to 0xff the "payload" refers to the actual data not
 including the length specification.
 - `0xf0`        : 1 byte payload, directly following the type byte
 - `0xf1`        : 2 bytes payload, directly following the type byte
 - `0xf2`        : 4 bytes payload, directly following the type byte
 - `0xf3`        : 8 bytes payload, directly following the type byte
 - `0xf4`-`0xf6` : length of the payload is described by a single further unsigned byte directly following the type byte,
  the payload of that many bytes follows
 - `0xf7`-`0xf9` : length of the payload is described by two bytes (little endian unsigned integer) directly following
  the type byte, the payload of that many bytes follows
 - `0xfa`-`0xfc` : length of the payload is described by four bytes (little endian unsigned integer) directly following
  the type byte, the payload of that many bytes follows
 - `0xfd`-`0xff` : length of the payload is described by eight bytes (little endian unsigned integer) directly following
  the type byte, the payload of that many bytes follows

 Note: In types `0xf4` to `0xff` the "payload" refers to the actual data not including the length specification.

 ## Portability

 Serialized booleans, integers, strings, arrays, objects etc. all have a defined endianess and length, which is
 platform-independent. These types are fully portable in serialized VelocyPack.

 There are still a few caveats when it comes to portability:

 It is possible to build up very large values on a 64 bit system, but it may not be possible to read them back on a 32
 bit system. This is because the maximum memory allocation size on a 32 bit system may be severely limited compared to a
 64 bit system, i.e. a 32 bit OS may simply not allow to allocate buffers larger than 4 GB. This is not a limitation of
 VelocyPack, but a limitation of 32 bit architectures. If all VelocyPack values are kept small enough so that they are
 well below the 32 bit length boundaries, this should not matter though.

 The VelocyPack type *External* contains just a raw pointer to memory, which should only be used during the buildup of
 VelocyPack values in memory. The *External* type is not supposed to be used in VelocyPack values that are serialized and
 stored persistently, and then later read back from persistence. Doing it anyway is not portable and will also pose a
 security risk. Not using the *External* type for any data that is serialized will avoid this problem entirely.

 The VelocyPack type *Custom* is completely user-defined, and there is no default implementation for them. So it is up to
 the embedder to make these custom type bindings portable if portability of them is a concern.

 VelocyPack *Double* values are serialized as integer equivalents in a specific way, and unserialized back into integers
 that overlay a IEEE-754 double-precision floating point value in memory. We found this to be sufficiently portable for
 our needs, although at least in theory there may be portability issues with some systems.

 The [following](https://en.wikipedia.org/wiki/Endianness#Floating_point) was used as a backing for our "reasonably
 portable in the real world" assumptions:

 > It may therefore appear strange that the widespread IEEE 754 floating-point standard does not specify endianness.[17] Theoretically, this means that even standard IEEE floating-point data written by one machine might not be readable by another. However, on modern standard computers (i.e., implementing IEEE 754), one may in practice safely assume that the endianness is the same for floating-point numbers as for integers, making the conversion straightforward regardless of data type. 
--- a/VelocyPack_zh.md
+++ b/VelocyPack_zh.md
@ -36,7 +36,7 @@
    0x19：25 false
    0x1a：26 true
    0x1b：27 双IEEE-754，后跟8个字节，存储为与uint64等效的小字节序
    0x1c：28 UTC日期（自纪元以来），以8字节有符号int，little endian，二进制补码形式存储
    0x1c：28 UTC日期（自纪元以来）毫秒数，以8字节有符号int，little endian，二进制补码形式存储
    0x1d：29 外部（仅在内存中）：一个char *，指向另一个VPack项在内存中的实际位置，磁盘或网络上的VPack值中不允许
    0x1e：30 minKey，比较所有其他值<的无意义的值
    0x1f：31 maxKey，与所有其他值相比>的无意义值
@ -320,4 +320,23 @@
    0xf7-0xf9：有效负载的长度由紧随类型字节之后的两个字节（小尾数无符号整数）描述，该字节的有效负载紧随其后
    0xfa-0xfc：有效载荷的长度由紧随类型字节之后的四个字节（小尾数无符号整数）描述，该字节的有效载荷如下
    0xfd-0xff：有效载荷的长度由紧随类型字节之后的八个字节（小尾数无符号整数）描述，该字节的有效载荷如下
    注意：在类型0xf4至0xff中，“有效负载”是指不包括长度说明的实际数据。
    注意：在类型0xf4至0xff中，“有效负载”是指不包括长度说明的实际数据。

 ## Portability

 	序列化的布尔值、整数、字符串、数组、对象等都具有定义的字节序和长度，这与平台无关。这些类型在序列化的 VelocyPack 中是完全可移植的。

 	在可移植性方面仍有一些注意事项：
 	
 	在 64 位系统上可以建立非常大的值，但在 32 位系统上可能无法读取它们。这是因为与 64 位系统相比，32 位系统上的最大内存分配大小可能受到严重限制，即 32 位操作系统可能根本不允许分配大于 4 GB 的缓冲区。这不是 VelocyPack 的限制，而是 32 位架构的限制。如果所有 VelocyPack 值都保持足够小，以至于它们远低于 32 位长度边界，但这应该无关紧要。
 	
 	VelocyPack 类型External只包含一个指向内存的原始指针，它应该只在内存中 VelocyPack 值的构建期间使用。在外部类型是不应该在被序列化和永久存储，再后来从持续读回VelocyPack值使用。无论如何这样做是不可移植的，还会带来安全风险。不对任何序列化的数据使用External类型将完全避免这个问题。
 	
 	VelocyPack 类型Custom完全由用户定义，并且没有默认实现。因此，如果它们的可移植性是一个问题，则由嵌入器来使这些自定义类型绑定具有可移植性。
 	
 	VelocyPack Double值以特定方式序列化为整数等价物，并反序列化为覆盖内存中 IEEE-754 双精度浮点值的整数。我们发现这足以满足我们的需求，尽管至少在理论上某些系统可能存在可移植性问题。
 	
 	在下面被用作我们的“现实世界中的合理便携式”假设一个后盾：
 	
 	因此，广泛使用的 IEEE 754 浮点标准没有指定字节顺序可能会显得很奇怪。 [17] 从理论上讲，这意味着即使是由一台机器编写的标准 IEEE 浮点数据也可能无法被另一台机器读取。然而，在现代标准计算机（即实现 IEEE 754）上，人们实际上可以安全地假设浮点数与整数的字节序相同，从而使转换变得简单，而不管数据类型如何。
 	
--- a/include/eVPack.hrl
+++ b/include/eVPack.hrl
@ -10,8 +10,8 @@
 -define(VpArrNc, 0).          %% 不压缩编码
 -define(VpArrYc, 1).          %% 压缩编码

 -define(VpObjDef, 0).         %% 默认选项 排序Obj key  Obj不压缩
 -define(VpArrDef, 0).         %% 默认选项 Arr不压缩排序
 -define(VpArrDef, ?VpArrNc).         %% 默认选项 Arr不压缩排序
 -define(VpObjDef, ?VpObjNcYs).         %% 默认选项 排序Obj key  Obj不压缩

 -define(VpAllOpts(Arr, Obj), Obj bsl 1 bor Arr).      %% 拼装Obj 与 Arr选项
 -define(VpObjOpts(VpAllOpts), VpAllOpts bsr 1).        %% 获取Obj选项
--- a/rebar.config
+++ b/rebar.config
@ -1,4 +1,4 @@
 {erl_opts, [debug_info, {i, "include"}]}.
 {deps, [
   {eSync, ".*", {git, "http://47.108.26.175:53000/SisMaker/eSync.git", {branch, "master"}}}
   {eSync, ".*", {git, "http://sismaker.tpddns.cn:53000/SisMaker/eSync.git", {branch, "master"}}}
 ]}.
--- a/src/eVPack.app.src
+++ b/src/eVPack.app.src
@ -1,10 +1,10 @@
 {application, eVPack,
 [{description, "An OTP library"},
    {vsn, "0.1.0"},
    {registered, []},
    {applications, [kernel, stdlib]},
    {env, []},
    {modules, []},
    {licenses, ["MIT"]},
    {links, []}
 ]}.
   [{description, "ArangoDB VelocyPack erlang's libiary"},
      {vsn, "0.1.0"},
      {registered, []},
      {applications, [kernel, stdlib]},
      {env, []},
      {modules, []},
      {licenses, ["MIT"]},
      {links, []}
   ]}.
--- a/src/eVPack.erl
+++ b/src/eVPack.erl
@ -226,7 +226,7 @@ doEncodeSortMap(Iterator, ArrOpt, ObjOpt, AccList, Offsets, SumSize) ->
         KeyStr = asKey(Key),
         {KeyEn, KeySize} = encodeString(KeyStr),
         {ValueEn, ValueSize} = encoder(Value, ArrOpt, ObjOpt),
         doEncodeSortMap(NextIter, ArrOpt, ObjOpt, [ValueEn, KeyEn | AccList], [{KeyStr, SumSize} | Offsets], SumSize + KeySize + ValueSize);
         doEncodeSortMap(NextIter, ArrOpt, ObjOpt, [ValueEn, KeyEn | AccList], [SumSize | Offsets], SumSize + KeySize + ValueSize);
      none ->
         {AccList, Offsets, SumSize}
   end.
@ -318,13 +318,13 @@ buildIndexTable_8(Offsets, StartSize) ->
   <<<<(OneOff + StartSize):8/integer-little-unsigned-unit:8>> || OneOff <- lists:reverse(Offsets)>>.

 buildSMIndexTable_1(Offsets, StartSize) ->
   <<<<(OneOff + StartSize):1/integer-little-unsigned-unit:8>> || {_, OneOff} <- lists:sort(Offsets)>>.
   <<<<(OneOff + StartSize):1/integer-little-unsigned-unit:8>> || OneOff <- lists:sort(Offsets)>>.
 buildSMIndexTable_2(Offsets, StartSize) ->
   <<<<(OneOff + StartSize):2/integer-little-unsigned-unit:8>> || {_, OneOff} <- lists:sort(Offsets)>>.
   <<<<(OneOff + StartSize):2/integer-little-unsigned-unit:8>> || OneOff <- lists:sort(Offsets)>>.
 buildSMIndexTable_4(Offsets, StartSize) ->
   <<<<(OneOff + StartSize):4/integer-little-unsigned-unit:8>> || {_, OneOff} <- lists:sort(Offsets)>>.
   <<<<(OneOff + StartSize):4/integer-little-unsigned-unit:8>> || OneOff <- lists:sort(Offsets)>>.
 buildSMIndexTable_8(Offsets, StartSize) ->
   <<<<(OneOff + StartSize):8/integer-little-unsigned-unit:8>> || {_, OneOff} <- lists:sort(Offsets)>>.
   <<<<(OneOff + StartSize):8/integer-little-unsigned-unit:8>> || OneOff <- lists:sort(Offsets)>>.

 compactIntegerList(Integer, AccList) ->
   case Integer < 128 of