set.go 7.09 KB
package ksuid

import (
	"bytes"
	"encoding/binary"
)

// CompressedSet is an immutable data type which stores a set of KSUIDs.
type CompressedSet []byte

// Iter returns an iterator that produces all KSUIDs in the set.
func (set CompressedSet) Iter() CompressedSetIter {
	return CompressedSetIter{
		content: []byte(set),
	}
}

// String satisfies the fmt.Stringer interface, returns a human-readable string
// representation of the set.
func (set CompressedSet) String() string {
	b := bytes.Buffer{}
	b.WriteByte('[')
	set.writeTo(&b)
	b.WriteByte(']')
	return b.String()
}

// String satisfies the fmt.GoStringer interface, returns a Go representation of
// the set.
func (set CompressedSet) GoString() string {
	b := bytes.Buffer{}
	b.WriteString("ksuid.CompressedSet{")
	set.writeTo(&b)
	b.WriteByte('}')
	return b.String()
}

func (set CompressedSet) writeTo(b *bytes.Buffer) {
	a := [27]byte{}

	for i, it := 0, set.Iter(); it.Next(); i++ {
		if i != 0 {
			b.WriteString(", ")
		}
		b.WriteByte('"')
		it.KSUID.Append(a[:0])
		b.Write(a[:])
		b.WriteByte('"')
	}
}

// Compress creates and returns a compressed set of KSUIDs from the list given
// as arguments.
func Compress(ids ...KSUID) CompressedSet {
	c := 1 + byteLength + (len(ids) / 5)
	b := make([]byte, 0, c)
	return AppendCompressed(b, ids...)
}

// AppendCompressed uses the given byte slice as pre-allocated storage space to
// build a KSUID set.
//
// Note that the set uses a compression technique to store the KSUIDs, so the
// resuling length is not 20 x len(ids). The rule of thumb here is for the given
// byte slice to reserve the amount of memory that the application would be OK
// to waste.
func AppendCompressed(set []byte, ids ...KSUID) CompressedSet {
	if len(ids) != 0 {
		if !IsSorted(ids) {
			Sort(ids)
		}
		one := makeUint128(0, 1)

		// The first KSUID is always written to the set, this is the starting
		// point for all deltas.
		set = append(set, byte(rawKSUID))
		set = append(set, ids[0][:]...)

		timestamp := ids[0].Timestamp()
		lastKSUID := ids[0]
		lastValue := uint128Payload(ids[0])

		for i := 1; i != len(ids); i++ {
			id := ids[i]

			if id == lastKSUID {
				continue
			}

			t := id.Timestamp()
			v := uint128Payload(id)

			if t != timestamp {
				d := t - timestamp
				n := varintLength32(d)

				set = append(set, timeDelta|byte(n))
				set = appendVarint32(set, d, n)
				set = append(set, id[timestampLengthInBytes:]...)

				timestamp = t
			} else {
				d := sub128(v, lastValue)

				if d != one {
					n := varintLength128(d)

					set = append(set, payloadDelta|byte(n))
					set = appendVarint128(set, d, n)
				} else {
					l, c := rangeLength(ids[i+1:], t, id, v)
					m := uint64(l + 1)
					n := varintLength64(m)

					set = append(set, payloadRange|byte(n))
					set = appendVarint64(set, m, n)

					i += c
					id = ids[i]
					v = uint128Payload(id)
				}
			}

			lastKSUID = id
			lastValue = v
		}
	}
	return CompressedSet(set)
}

func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint128) (length int, count int) {
	one := makeUint128(0, 1)

	for i := range ids {
		id := ids[i]

		if id == lastKSUID {
			continue
		}

		if id.Timestamp() != timestamp {
			count = i
			return
		}

		v := uint128Payload(id)

		if sub128(v, lastValue) != one {
			count = i
			return
		}

		lastKSUID = id
		lastValue = v
		length++
	}

	count = len(ids)
	return
}

func appendVarint128(b []byte, v uint128, n int) []byte {
	c := v.bytes()
	return append(b, c[len(c)-n:]...)
}

func appendVarint64(b []byte, v uint64, n int) []byte {
	c := [8]byte{}
	binary.BigEndian.PutUint64(c[:], v)
	return append(b, c[len(c)-n:]...)
}

func appendVarint32(b []byte, v uint32, n int) []byte {
	c := [4]byte{}
	binary.BigEndian.PutUint32(c[:], v)
	return append(b, c[len(c)-n:]...)
}

func varint128(b []byte) uint128 {
	a := [16]byte{}
	copy(a[16-len(b):], b)
	return makeUint128FromPayload(a[:])
}

func varint64(b []byte) uint64 {
	a := [8]byte{}
	copy(a[8-len(b):], b)
	return binary.BigEndian.Uint64(a[:])
}

func varint32(b []byte) uint32 {
	a := [4]byte{}
	copy(a[4-len(b):], b)
	return binary.BigEndian.Uint32(a[:])
}

func varintLength128(v uint128) int {
	if v[1] != 0 {
		return 8 + varintLength64(v[1])
	}
	return varintLength64(v[0])
}

func varintLength64(v uint64) int {
	switch {
	case (v & 0xFFFFFFFFFFFFFF00) == 0:
		return 1
	case (v & 0xFFFFFFFFFFFF0000) == 0:
		return 2
	case (v & 0xFFFFFFFFFF000000) == 0:
		return 3
	case (v & 0xFFFFFFFF00000000) == 0:
		return 4
	case (v & 0xFFFFFF0000000000) == 0:
		return 5
	case (v & 0xFFFF000000000000) == 0:
		return 6
	case (v & 0xFF00000000000000) == 0:
		return 7
	default:
		return 8
	}
}

func varintLength32(v uint32) int {
	switch {
	case (v & 0xFFFFFF00) == 0:
		return 1
	case (v & 0xFFFF0000) == 0:
		return 2
	case (v & 0xFF000000) == 0:
		return 3
	default:
		return 4
	}
}

const (
	rawKSUID     = 0
	timeDelta    = (1 << 6)
	payloadDelta = (1 << 7)
	payloadRange = (1 << 6) | (1 << 7)
)

// CompressedSetIter is an iterator type returned by Set.Iter to produce the
// list of KSUIDs stored in a set.
//
// Here's is how the iterator type is commonly used:
//
//	for it := set.Iter(); it.Next(); {
//		id := it.KSUID
//		// ...
//	}
//
// CompressedSetIter values are not safe to use concurrently from multiple
// goroutines.
type CompressedSetIter struct {
	// KSUID is modified by calls to the Next method to hold the KSUID loaded
	// by the iterator.
	KSUID KSUID

	content []byte
	offset  int

	seqlength uint64
	timestamp uint32
	lastValue uint128
}

// Next moves the iterator forward, returning true if there a KSUID was found,
// or false if the iterator as reached the end of the set it was created from.
func (it *CompressedSetIter) Next() bool {
	if it.seqlength != 0 {
		value := incr128(it.lastValue)
		it.KSUID = value.ksuid(it.timestamp)
		it.seqlength--
		it.lastValue = value
		return true
	}

	if it.offset == len(it.content) {
		return false
	}

	b := it.content[it.offset]
	it.offset++

	const mask = rawKSUID | timeDelta | payloadDelta | payloadRange
	tag := int(b) & mask
	cnt := int(b) & ^mask

	switch tag {
	case rawKSUID:
		off0 := it.offset
		off1 := off0 + byteLength

		copy(it.KSUID[:], it.content[off0:off1])

		it.offset = off1
		it.timestamp = it.KSUID.Timestamp()
		it.lastValue = uint128Payload(it.KSUID)

	case timeDelta:
		off0 := it.offset
		off1 := off0 + cnt
		off2 := off1 + payloadLengthInBytes

		it.timestamp += varint32(it.content[off0:off1])

		binary.BigEndian.PutUint32(it.KSUID[:timestampLengthInBytes], it.timestamp)
		copy(it.KSUID[timestampLengthInBytes:], it.content[off1:off2])

		it.offset = off2
		it.lastValue = uint128Payload(it.KSUID)

	case payloadDelta:
		off0 := it.offset
		off1 := off0 + cnt

		delta := varint128(it.content[off0:off1])
		value := add128(it.lastValue, delta)

		it.KSUID = value.ksuid(it.timestamp)
		it.offset = off1
		it.lastValue = value

	case payloadRange:
		off0 := it.offset
		off1 := off0 + cnt

		value := incr128(it.lastValue)
		it.KSUID = value.ksuid(it.timestamp)
		it.seqlength = varint64(it.content[off0:off1])
		it.offset = off1
		it.seqlength--
		it.lastValue = value

	default:
		panic("KSUID set iterator is reading malformed data")
	}

	return true
}