-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added multiple fingerprint algorithms (#25)
- Loading branch information
Showing
5 changed files
with
425 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// Package crc64 implements the Avro CRC-64 checksum. | ||
// See https://avro.apache.org/docs/current/spec.html#schema_fingerprints for information. | ||
package crc64 | ||
|
||
import ( | ||
"hash" | ||
"sync" | ||
) | ||
|
||
// Size is the of a CRC-64 checksum in bytes. | ||
const Size = 8 | ||
|
||
// Empty is the empty checksum. | ||
const Empty = 0xc15d213aa4d7a795 | ||
|
||
// Table is a 256-word table representing the polynomial for efficient processing. | ||
type Table [256]uint64 | ||
|
||
func makeTable() *Table { | ||
t := new(Table) | ||
for i := 0; i < 256; i++ { | ||
fp := uint64(i) | ||
for j := 0; j < 8; j++ { | ||
fp = (fp >> 1) ^ (Empty & -(fp & 1)) | ||
} | ||
t[i] = fp | ||
} | ||
return t | ||
} | ||
|
||
var ( | ||
tableBuildOnce sync.Once | ||
crc64Table *Table | ||
) | ||
|
||
func buildTableOnce() { | ||
tableBuildOnce.Do(buildTable) | ||
} | ||
|
||
func buildTable() { | ||
crc64Table = makeTable() | ||
} | ||
|
||
type digest struct { | ||
crc uint64 | ||
tab *Table | ||
} | ||
|
||
// New creates a new hash.Hash64 computing the Avro CRC-64 checksum. | ||
// Its Sum method will lay the value out in big-endian byte order. | ||
func New() hash.Hash64 { | ||
buildTableOnce() | ||
|
||
return &digest{ | ||
crc: Empty, | ||
tab: crc64Table, | ||
} | ||
} | ||
|
||
// Size returns the bytes size of the checksum. | ||
func (d *digest) Size() int { | ||
return Size | ||
} | ||
|
||
// BlockSize returns the block size of the checksum. | ||
func (d *digest) BlockSize() int { | ||
return 1 | ||
} | ||
|
||
// Reset resets the hash instance. | ||
func (d *digest) Reset() { | ||
d.crc = Empty | ||
} | ||
|
||
// Write accumulatively adds the given data to the checksum. | ||
func (d *digest) Write(p []byte) (n int, err error) { | ||
for i := 0; i < len(p); i++ { | ||
d.crc = (d.crc >> 8) ^ d.tab[(int)(byte(d.crc)^p[i])&0xff] | ||
} | ||
|
||
return len(p), nil | ||
} | ||
|
||
// Sum64 returns the checksum as a uint64. | ||
func (d *digest) Sum64() uint64 { | ||
return d.crc | ||
} | ||
|
||
// Sum returns the checksum as a byte slice, using the given byte slice. | ||
func (d *digest) Sum(in []byte) []byte { | ||
s := d.Sum64() | ||
return append(in, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package crc64 | ||
|
||
import ( | ||
"strconv" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestGolden(t *testing.T) { | ||
tests := []struct { | ||
in string | ||
want uint64 | ||
}{ | ||
{ | ||
in: `"null"`, | ||
want: 7195948357588979594, | ||
}, | ||
{ | ||
in: `{"name":"foo","type":"fixed","size":15}`, | ||
want: 1756455273707447556, | ||
}, | ||
{ | ||
in: `{"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"}]}`, | ||
want: 7843277075252814651, | ||
}, | ||
} | ||
|
||
hash := New() | ||
|
||
for i, tt := range tests { | ||
t.Run(strconv.Itoa(i), func(t *testing.T) { | ||
hash.Reset() | ||
hash.Write([]byte(tt.in)) | ||
|
||
got := hash.Sum64() | ||
|
||
assert.Equal(t, tt.want, got) | ||
}) | ||
} | ||
} | ||
|
||
func TestGoldenBytes(t *testing.T) { | ||
tests := []struct { | ||
in string | ||
want []byte | ||
}{ | ||
{ | ||
in: `"null"`, | ||
want: []byte{0x63, 0xdd, 0x24, 0xe7, 0xcc, 0x25, 0x8f, 0x8a}, | ||
}, | ||
{ | ||
in: `{"name":"foo","type":"fixed","size":15}`, | ||
want: []byte{0x18, 0x60, 0x2e, 0xc3, 0xed, 0x31, 0xa5, 0x4}, | ||
}, | ||
{ | ||
in: `{"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"}]}`, | ||
want: []byte{0x6c, 0xd8, 0xea, 0xf1, 0xc9, 0x68, 0xa3, 0x3b}, | ||
}, | ||
} | ||
|
||
hash := New() | ||
|
||
for i, tt := range tests { | ||
t.Run(strconv.Itoa(i), func(t *testing.T) { | ||
hash.Reset() | ||
hash.Write([]byte(tt.in)) | ||
|
||
got := make([]byte, 0, hash.Size()) | ||
got = hash.Sum(got) | ||
|
||
assert.Equal(t, tt.want, got) | ||
}) | ||
} | ||
} | ||
|
||
func TestDigest_BlockSize(t *testing.T) { | ||
hash := New() | ||
|
||
assert.Equal(t, 1, hash.BlockSize()) | ||
} | ||
|
||
func bench(b *testing.B, size int64) { | ||
b.SetBytes(size) | ||
|
||
h := New() | ||
in := make([]byte, 0, h.Size()) | ||
|
||
data := make([]byte, size) | ||
for i := range data { | ||
data[i] = byte(i) | ||
} | ||
|
||
b.ReportAllocs() | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
h.Reset() | ||
_, _ = h.Write(data) | ||
h.Sum(in) | ||
|
||
in = in[:0] | ||
} | ||
} | ||
|
||
func BenchmarkCrc64(b *testing.B) { | ||
b.Run("64KB", func(b *testing.B) { | ||
bench(b, 64<<10) | ||
}) | ||
b.Run("4KB", func(b *testing.B) { | ||
bench(b, 4<<10) | ||
}) | ||
b.Run("1KB", func(b *testing.B) { | ||
bench(b, 1<<10) | ||
}) | ||
} |
Oops, something went wrong.