nbaser

Allows for efficient encoding and decoding of bases with the full unicode character set. Supporting bases 2 up to 256 in length.

This makes it an interesting base generator for specific locales, and space efficient in terms of character length by allowing up to base256 with any UTF-8 characters of your choosing.

NOTES:

This will not encode/decode bases such as standard base32/base64 with padding and instead utilizes bitcoin style leading zero compression.

Proceed with caution utilizing any unprintable unicode characters.

The unicode support does slow this implementation down by an order of magnitude than if it just supported ASCII. Check the benchmarks to verify whether it will perform sufficiently for your usecase.

Types

NBaserError = object of CatchableError
  
Catchable error arising from nbaser module.   Source Edit
InvalidBaseSizeError {...}{.final.} = object of NBaserError
  
Base size is not between 2 and 256 inclusive.   Source Edit
InvalidBaseAlphabetError {...}{.final.} = object of NBaserError
  
Base has duplicate characters which can cause unexpected behaviour.   Source Edit
UnsupportedCharacterError {...}{.final.} = object of NBaserError
  
A character not part of the base was detected.   Source Edit
NonZeroCarryError {...}{.final.} = object of NBaserError
  
Failed to achieve a non-zero carry during encoding/decoding.   Source Edit
NBaserProc = proc (a: string) {...}{.inline, noSideEffect.} |
    proc (a: string): (bool, string) {...}{.inline.} |
    proc (a: string): bool {...}{.inline.} |
    proc (a: string; b: openArray[byte]; c: bool = false): string {...}{.inline.} |
    proc (a, b: string; c: bool = false): seq[byte] {...}{.inline.}

Type for any exported nbaser functions.

NOTE: This is not a mutually exclusive check. Outside procs could match one of these signatures. This was mostly for internal use, so beware of this caveat if deciding to use it.

  Source Edit

Funcs

func checkBaseValidity(baseAlphabet: string) {...}{.raises: [NBaserError], inline, tags: [].}

Runs sanity checks on the passed baseAlphabet.

Raises a NBaserError (one of InvalidBaseSizeError or InvalidBaseAlphabetError).

Examples:

try:
  checkBaseValidity("0")
  doAssert(false, "should never reach this")
except:
  discard
  Source Edit
func getBaseValidity(baseAlphabet: string): (bool, string) {...}{.inline, raises: [],
    tags: [].}

Runs sanity checks on the passed baseAlphabet.

Returns a tuple containing a boolean indicating the validity (true for valid, false for invalid), and exception message if available.

Examples:

doAssert getBaseValidity("") == (false, "minimum base size is 2")
doAssert getBaseValidity("0") == (false, "minimum base size is 2")
doAssert getBaseValidity("010") ==
    (false, "alphabet must not have any char dupes")
doAssert getBaseValidity("01") == (true, "")
var unicode256Char = "šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒ"
unicode256Char &=
    "ƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždž"
unicode256Char &=
    "LJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼ"
unicode256Char &=
    "ǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥφχψωϊϋόύώϐϑϒϓϔ"
unicode256Char &=
    "ϕϖϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿЀЁ"
doAssert getBaseValidity(unicode256Char) == (true, "")
doAssert getBaseValidity(unicode256Char & 'X') ==
    (false, "maximum base size is 256")
doAssert getBaseValidity("abcdefABCDEF01~.,") == (true, "")
  Source Edit
func isBaseValid(baseAlphabet: string): bool {...}{.inline, raises: [], tags: [].}

Functional alias of getBaseValidity. Omits fetching of exception message. Should be run before any base switch, in case of using default encode/decode functions which omit the base check. Singular check using this on a change will be more efficient.

Returns a boolean indicating true for valid and false for invalid.

Examples:

doAssert isBaseValid("") == false
doAssert isBaseValid("0") == false
doAssert isBaseValid("010") == false
doAssert isBaseValid("01") == true
var unicode256Chars = "šŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒ"
unicode256Chars &=
    "ƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždž"
unicode256Chars &=
    "LJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼ"
unicode256Chars &=
    "ǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥφχψωϊϋόύώϐϑϒϓϔ"
unicode256Chars &=
    "ϕϖϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿЀЁ"
doAssert isBaseValid(unicode256Chars) == true
doAssert isBaseValid("𓊽𓊸") == true
let unicode257Chars = unicode256Chars & 'X'
doAssert isBaseValid(unicode257Chars) == false
doAssert isBaseValid("abcdefABCDEF01~.,") == true
  Source Edit
func encode(baseAlphabet: string; src: openArray[byte]; checkBase: bool = false): string {...}{.
    inline, raises: [NBaserError], tags: [].}

Takes a baseAlphabet string to convert src bytes into the representative string for the base passed.

Accepts optional checkBase bool which is off by default, on whether to run a sanity check on the base before hand (recommended on in case of user input or sanitize the base first using another func/proc if calling encode multiple times with same base for efficiency).

Returns a string of the result.

Throws a NBaserError on exception.

Examples:

import
  unittest

const
  base2 = "01"
  base32 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  base58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
doAssert base2.encode(@[byte 255]) == "11111111"
doAssert base2.encode(@[byte 254]) == "11111110"
doAssert base2.encode(@[byte 0, 1]) == "01"
doAssert base2.encode(@[byte 1, 0]) == "100000000"
doAssert base32.encode(@[byte 0, 0]) == "AA"
doAssert base32.encode(@[byte 0, 1]) == "AB"
doAssert base32.encode(@[byte 0, 1, 0]) == "AIA"
doAssert base58.encode(@[byte 0, 1, 2, 3, 4, 5]) == "17bWpTW"
doAssert base58.encode(@[byte 0, 0, 0, 255]) == "1115Q"
const
  invalidBase = "0102"
expect NBaserError:
  discard invalidBase.encode(@[byte 0], true)
expect InvalidBaseAlphabetError:
  discard invalidBase.encode(@[byte 0], true)
  Source Edit
func decode(baseAlphabet: string; src: string; checkBase: bool = false): seq[byte] {...}{.
    inline, raises: [NBaserError], tags: [].}

Takes a baseAlphabet string to convert src string into representative bytes from the base. Accepts optional checkBase bool which is off by default, on whether to run a sanity check on the base provided.

Returns a sequence of bytes as the result.

Raises a NBaserError on internal exception.

Examples:

import
  unittest

const
  base2 = "01"
  base32 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  base58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
doAssert base2.decode("11111111") == @[byte 255]
doAssert base2.decode("11111110") == @[byte 254]
doAssert base2.decode("01") == @[byte 0, 1]
doAssert base2.decode("100000000") == @[byte 1, 0]
doAssert base32.decode("AA") == @[byte 0, 0]
doAssert base32.decode("AB") == @[byte 0, 1]
doAssert base32.decode("AIA") == @[byte 0, 1, 0]
doAssert base58.decode("17bWpTW") == @[byte 0, 1, 2, 3, 4, 5]
doAssert base58.decode("1115Q") == @[byte 0, 0, 0, 255]
const
  invalidBase = "0102"
expect NBaserError:
  discard invalidBase.decode("0", true)
expect InvalidBaseAlphabetError:
  discard invalidBase.decode("0", true)
  Source Edit