Skip to content

Commit

Permalink
Added multiple fingerprint algorithms (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
nrwiersma authored Aug 9, 2019
1 parent 644fd3b commit a27b932
Show file tree
Hide file tree
Showing 5 changed files with 425 additions and 5 deletions.
93 changes: 93 additions & 0 deletions pkg/crc64/crc64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Package crc64 implements the Avro CRC-64 checksum.
// See https://avro.apache.org/docs/current/spec.html#schema_fingerprints for information.
package crc64

import (
"hash"
"sync"
)

// Size is the of a CRC-64 checksum in bytes.
const Size = 8

// Empty is the empty checksum.
const Empty = 0xc15d213aa4d7a795

// Table is a 256-word table representing the polynomial for efficient processing.
type Table [256]uint64

func makeTable() *Table {
t := new(Table)
for i := 0; i < 256; i++ {
fp := uint64(i)
for j := 0; j < 8; j++ {
fp = (fp >> 1) ^ (Empty & -(fp & 1))
}
t[i] = fp
}
return t
}

var (
tableBuildOnce sync.Once
crc64Table *Table
)

func buildTableOnce() {
tableBuildOnce.Do(buildTable)
}

func buildTable() {
crc64Table = makeTable()
}

type digest struct {
crc uint64
tab *Table
}

// New creates a new hash.Hash64 computing the Avro CRC-64 checksum.
// Its Sum method will lay the value out in big-endian byte order.
func New() hash.Hash64 {
buildTableOnce()

return &digest{
crc: Empty,
tab: crc64Table,
}
}

// Size returns the bytes size of the checksum.
func (d *digest) Size() int {
return Size
}

// BlockSize returns the block size of the checksum.
func (d *digest) BlockSize() int {
return 1
}

// Reset resets the hash instance.
func (d *digest) Reset() {
d.crc = Empty
}

// Write accumulatively adds the given data to the checksum.
func (d *digest) Write(p []byte) (n int, err error) {
for i := 0; i < len(p); i++ {
d.crc = (d.crc >> 8) ^ d.tab[(int)(byte(d.crc)^p[i])&0xff]
}

return len(p), nil
}

// Sum64 returns the checksum as a uint64.
func (d *digest) Sum64() uint64 {
return d.crc
}

// Sum returns the checksum as a byte slice, using the given byte slice.
func (d *digest) Sum(in []byte) []byte {
s := d.Sum64()
return append(in, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
}
115 changes: 115 additions & 0 deletions pkg/crc64/crc64_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package crc64

import (
"strconv"
"testing"

"github.com/stretchr/testify/assert"
)

func TestGolden(t *testing.T) {
tests := []struct {
in string
want uint64
}{
{
in: `"null"`,
want: 7195948357588979594,
},
{
in: `{"name":"foo","type":"fixed","size":15}`,
want: 1756455273707447556,
},
{
in: `{"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"}]}`,
want: 7843277075252814651,
},
}

hash := New()

for i, tt := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
hash.Reset()
hash.Write([]byte(tt.in))

got := hash.Sum64()

assert.Equal(t, tt.want, got)
})
}
}

func TestGoldenBytes(t *testing.T) {
tests := []struct {
in string
want []byte
}{
{
in: `"null"`,
want: []byte{0x63, 0xdd, 0x24, 0xe7, 0xcc, 0x25, 0x8f, 0x8a},
},
{
in: `{"name":"foo","type":"fixed","size":15}`,
want: []byte{0x18, 0x60, 0x2e, 0xc3, 0xed, 0x31, 0xa5, 0x4},
},
{
in: `{"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"}]}`,
want: []byte{0x6c, 0xd8, 0xea, 0xf1, 0xc9, 0x68, 0xa3, 0x3b},
},
}

hash := New()

for i, tt := range tests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
hash.Reset()
hash.Write([]byte(tt.in))

got := make([]byte, 0, hash.Size())
got = hash.Sum(got)

assert.Equal(t, tt.want, got)
})
}
}

func TestDigest_BlockSize(t *testing.T) {
hash := New()

assert.Equal(t, 1, hash.BlockSize())
}

func bench(b *testing.B, size int64) {
b.SetBytes(size)

h := New()
in := make([]byte, 0, h.Size())

data := make([]byte, size)
for i := range data {
data[i] = byte(i)
}

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
h.Reset()
_, _ = h.Write(data)
h.Sum(in)

in = in[:0]
}
}

func BenchmarkCrc64(b *testing.B) {
b.Run("64KB", func(b *testing.B) {
bench(b, 64<<10)
})
b.Run("4KB", func(b *testing.B) {
bench(b, 4<<10)
})
b.Run("1KB", func(b *testing.B) {
bench(b, 1<<10)
})
}
Loading

0 comments on commit a27b932

Please sign in to comment.