From ff8d6743a6d23b02f12661a112f3f73a712fe056 Mon Sep 17 00:00:00 2001 From: Byron Kam Date: Fri, 13 Dec 2024 12:46:29 -0800 Subject: [PATCH] support python shortuuid (#12) --- README.md | 14 +++++++++ doc.go | 6 ++++ pythonshort.go | 73 +++++++++++++++++++++++++++++++++++++++++++++ pythonshort_test.go | 42 ++++++++++++++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 pythonshort.go create mode 100644 pythonshort_test.go diff --git a/README.md b/README.md index 585d3e9..02c778d 100644 --- a/README.md +++ b/README.md @@ -72,3 +72,17 @@ if !ok { return } ``` + +# NCName support + +`Parse` supports automatic detection and decoding NCName Compact UUID Base32 and Base64 encodings. + +`UUID.Compact64()` and `UUID.Compact32()` return the Base64 and Base32 NCName encoded values, respectively. + +# ShortUUID support + +`FromPythonShort` enables decoding of Python ShortUUID encoded UUIDs using the default alphabet (Base57) and padding +(22-rune length). + +`ToPythonShort` encodes a given `UUID` into a Python ShortUUID using the default alphabet (Base57) and padding +(22-rune length). diff --git a/doc.go b/doc.go index e948b16..1f2a6ab 100644 --- a/doc.go +++ b/doc.go @@ -16,6 +16,9 @@ const ( // MaxCompact64 is the canonical NCName Compact Base64 "Max" UUID. MaxCompact64 = "P____________________P" + // MaxPythonShort is the canonical "Max" Python ShortUUID. + MaxPythonShort = "oZEq7ovRbLq6UnGMPwc8B5" + // NilCanonical is the canonical RFC9562 "Nil" UUID. NilCanonical = "00000000-0000-0000-0000-000000000000" @@ -24,6 +27,9 @@ const ( // NilCompact64 is the canonical NCName Compact Base64 "Nil" UUID. NilCompact64 = "AAAAAAAAAAAAAAAAAAAAAA" + + // NilPythonShort is the canonical "Nil" Python ShortUUID. + NilPythonShort = "2222222222222222222222" ) // Version is the RFC9562 UUID Version. diff --git a/pythonshort.go b/pythonshort.go new file mode 100644 index 0000000..57ee2e9 --- /dev/null +++ b/pythonshort.go @@ -0,0 +1,73 @@ +package uid + +import ( + "math/big" + "strings" +) + +const ( + fiftySeven = 57 + pythonShortLen = 22 + b57decRef = "23456789" + "ABCDEFGH" + "JKLMN" + "PQRSTUVWXYZ" + "abcdefghijk" + "mnopqrstuvwxyz" +) + +//nolint:gochecknoglobals // wtb const arrays and (u)int128 +var ( + b57encRef = [fiftySeven]rune{ + '2', '3', '4', '5', '6', '7', '8', '9', // 8/57 + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 16/57 + 'J', 'K', 'L', 'M', 'N', // 21/57 + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', // 32/57 + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', // 43/57 + 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', // 57/57 + } + big57 = big.NewInt(fiftySeven) +) + +// ToPythonShort returns the Python ShortUUID encoding of u. See https://pypi.org/project/shortuuid. +func ToPythonShort(u UUID) string { + out, q, r := pythonShortBase(), new(big.Int).SetBytes(u.b[:]), new(big.Int) + for i := pythonShortLen - 1; i > -1; i-- { + q.QuoRem(q, big57, r) + out[i] = b57encRef[r.Int64()] + if q.Int64() == 0 { + break + } + } + return string(out[:]) +} + +// FromPythonShort parses a UUID from Python ShortUUID encoded ps. +func FromPythonShort(ps string) (UUID, bool) { + ps = strings.TrimSpace(ps) + if len(ps) != pythonShortLen { + return UUID{}, false + } + if ps == MaxPythonShort { + return Max(), true + } + if ps == NilPythonShort { + return Nil(), true + } + n := new(big.Int) + for _, r := range ps { + i := int64(strings.IndexRune(b57decRef, r)) + if i == -1 { + return UUID{}, false + } + n.Mul(n, big57).Add(n, big.NewInt(i)) + } + out := UUID{} + n.FillBytes(out.b[:]) + return out, true +} + +func pythonShortBase() [22]rune { + return [22]rune{ + '2', '2', '2', '2', '2', // 5/22 + '2', '2', '2', '2', '2', // 10/22 + '2', '2', '2', '2', '2', // 15/22 + '2', '2', '2', '2', '2', // 20/22 + '2', '2', // 22/22 + } +} diff --git a/pythonshort_test.go b/pythonshort_test.go new file mode 100644 index 0000000..d356f40 --- /dev/null +++ b/pythonshort_test.go @@ -0,0 +1,42 @@ +package uid_test + +import ( + "testing" + + "github.com/byron-janrain/uid" + "github.com/stretchr/testify/assert" +) + +func TestToPythonShort(t *testing.T) { + check := func(u uid.UUID, expected string) { assert.Exactly(t, expected, uid.ToPythonShort(u)) } + check(uid.Max(), uid.MaxPythonShort) + check(uid.Nil(), uid.NilPythonShort) + sut, ok := uid.Parse("3b1f8b40-222c-4a6e-b77e-779d5a94e21c") + assert.True(t, ok) + check(sut, "CXc85b4rqinB7s5J52TRYb") +} + +func TestFromPythonShortHappy(t *testing.T) { + check := func(input, expected string) { + sut, ok := uid.FromPythonShort(input) + assert.True(t, ok) + actual := sut.String() + assert.Exactly(t, expected, actual) + } + check(uid.MaxPythonShort, uid.MaxCanonical) + check(uid.NilPythonShort, uid.NilCanonical) + check("CXc85b4rqinB7s5J52TRYb", "3b1f8b40-222c-4a6e-b77e-779d5a94e21c") + check(" CXc85b4rqinB7s5J52TRYb\t", "3b1f8b40-222c-4a6e-b77e-779d5a94e21c") +} + +func TestFromPythonShortBads(t *testing.T) { + shouldFail := func(badInput string) { + u, ok := uid.FromPythonShort(badInput) + assert.Exactly(t, uid.Nil(), u) + assert.False(t, ok) + } + shouldFail("") // empty + shouldFail("tooshort") // too short + shouldFail("thisinputislongerthan22runes") // too long + shouldFail("02222" + "22222" + "22222" + "22222" + "22") // right length, bad runes +}