Skip to content

Commit

Permalink
Upgrade to v0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
EddieIvan01 committed Apr 25, 2020
1 parent b737535 commit 3860a83
Show file tree
Hide file tree
Showing 19 changed files with 3,243 additions and 34 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
v0.3:
1. Replace AES-CTR with Xchacha20. Because in Golang,
only AES-GCM has assembly-speed-up, but AES-GCM in
Golang implementation is not a stream operator, and
need more alloc overhead.
From the results of my test:
amd64: Xchacha20 is 4X faster than AES-CTR
i386: Xchacha20 is 3X faster than AES-CTR (8X slower than amd64)
2. Increse the TCP_BUFFER_SIZE to 0x8000
3. Fix a bug in UDP forward

v0.2.1:
1. Add heartbeat for remote-proxy's ctl-connection, to prevent
the NAT device drops mapping rules
Expand All @@ -9,5 +20,4 @@ v0.2:
v0.1.1:
1. Logic optimization, while both two connections are encrypted,
traffic will be forwarded without additional encryption and decryption

2. Made some little improvements
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ For example, we forward 3389 port in intranet to our VPS
./iox fwd -l *8888 -l 33890 -k 656565
```

It's easy to understand: traffic between be-controlled host and our VPS:8888 will be encrypted, the pre-shared secret key is 'AAA', `iox` will use it to generate seed key and IV, then encrypt with AES-CTR
It's easy to understand: traffic between be-controlled host and our VPS:8888 will be encrypted, the pre-shared secret key is 'AAA', `iox` will use it to generate seed key and IV, then encrypt with Xchacha20 (replace AES-CTR with Xchacha20 in v0.3 version)

So, the `*` should be used in pairs

Expand Down
661 changes: 661 additions & 0 deletions crypto/chacha20/LICENSE

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions crypto/chacha20/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
### chacha20 - ChaCha20
#### Yawning Angel (yawning at schwanenlied dot me)

Yet another Go ChaCha20 implementation. Everything else I found was slow,
didn't support all the variants I need to use, or relied on cgo to go fast.

Features:

* 20 round, 256 bit key only. Everything else is pointless and stupid.
* IETF 96 bit nonce variant.
* XChaCha 24 byte nonce variant.
* SSSE3 and AVX2 support on amd64 targets.
* Incremental encrypt/decrypt support, unlike golang.org/x/crypto/salsa20.
289 changes: 289 additions & 0 deletions crypto/chacha20/chacha20.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
// Copryright (C) 2019 Yawning Angel
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

// Package chacha20 implements the ChaCha20 stream cipher.
package chacha20

import (
"crypto/cipher"
"encoding/binary"
"errors"
"iox/crypto/chacha20/internal/api"
"iox/crypto/chacha20/internal/hardware"
"iox/crypto/chacha20/internal/ref"
"math"
)

const (
// KeySize is the ChaCha20 key size in bytes.
KeySize = 32

// NonceSize is the ChaCha20 nonce size in bytes.
NonceSize = 8

// INonceSize is the IETF ChaCha20 nonce size in bytes.
INonceSize = 12

// XNonceSize is the XChaCha20 nonce size in bytes.
XNonceSize = 24

// HNonceSize is the HChaCha20 nonce size in bytes.
HNonceSize = 16
)

var (
// ErrInvalidKey is the error returned when the key is invalid.
ErrInvalidKey = errors.New("chacha20: key length must be KeySize bytes")

// ErrInvalidNonce is the error returned when the nonce is invalid.
ErrInvalidNonce = errors.New("chacha20: nonce length must be NonceSize/INonceSize/XNonceSize bytes")

// ErrInvalidCounter is the error returned when the counter is invalid.
ErrInvalidCounter = errors.New("chacha20: block counter is invalid (out of range)")

supportedImpls []api.Implementation
activeImpl api.Implementation

_ cipher.Stream = (*Cipher)(nil)
)

// Cipher is an instance of ChaCha20/XChaCha20 using a particular key and nonce.
type Cipher struct {
state [api.StateSize]uint32
buf [api.BlockSize]byte

off int
ietf bool
}

// Reset zeros the key data so that it will no longer appear in the process's
// memory.
func (c *Cipher) Reset() {
for i := range c.state {
c.state[i] = 0
}
for i := range c.buf {
c.buf[i] = 0
}
}

// Seek sets the block counter to a given offset.
func (c *Cipher) Seek(blockCounter uint64) error {
if c.ietf {
if blockCounter > math.MaxUint32 {
return ErrInvalidCounter
}
c.state[12] = uint32(blockCounter)
} else {
c.state[12] = uint32(blockCounter)
c.state[13] = uint32(blockCounter >> 32)
}
c.off = api.BlockSize
return nil
}

// ReKey reinitializes the ChaCha20/XChaCha20 instance with the provided key
// and nonce.
func (c *Cipher) ReKey(key, nonce []byte) error {
c.Reset()
return c.doReKey(key, nonce)
}

func (c *Cipher) doReKey(key, nonce []byte) error {
if len(key) != KeySize {
return ErrInvalidKey
}

var subKey []byte
switch len(nonce) {
case NonceSize, INonceSize:
case XNonceSize:
subKey = c.buf[:KeySize]
activeImpl.HChaCha(key, nonce, subKey)
key = subKey
nonce = nonce[16:24]
default:
return ErrInvalidNonce
}

_ = key[31] // Force bounds check elimination.

c.state[0] = api.Sigma0
c.state[1] = api.Sigma1
c.state[2] = api.Sigma2
c.state[3] = api.Sigma3
c.state[4] = binary.LittleEndian.Uint32(key[0:4])
c.state[5] = binary.LittleEndian.Uint32(key[4:8])
c.state[6] = binary.LittleEndian.Uint32(key[8:12])
c.state[7] = binary.LittleEndian.Uint32(key[12:16])
c.state[8] = binary.LittleEndian.Uint32(key[16:20])
c.state[9] = binary.LittleEndian.Uint32(key[20:24])
c.state[10] = binary.LittleEndian.Uint32(key[24:28])
c.state[11] = binary.LittleEndian.Uint32(key[28:32])
c.state[12] = 0
if len(nonce) == INonceSize {
_ = nonce[11] // Force bounds check elimination.
c.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
c.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
c.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
c.ietf = true
} else {
_ = nonce[7] // Force bounds check elimination.
c.state[13] = 0
c.state[14] = binary.LittleEndian.Uint32(nonce[0:4])
c.state[15] = binary.LittleEndian.Uint32(nonce[4:8])
c.ietf = false
}
c.off = api.BlockSize

if subKey != nil {
for i := range subKey {
subKey[i] = 0
}
}

return nil
}

// New returns a new ChaCha20/XChaCha20 instance.
func New(key, nonce []byte) (*Cipher, error) {
var c Cipher
if err := c.doReKey(key, nonce); err != nil {
return nil, err
}

return &c, nil
}

// HChaCha is the HChaCha20 hash function used to make XChaCha.
func HChaCha(key, nonce []byte, dst *[32]byte) {
activeImpl.HChaCha(key, nonce, dst[:])
}

// XORKeyStream sets dst to the result of XORing src with the key stream. Dst
// and src may be the same slice but otherwise should not overlap.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
src = src[:len(dst)]
}

for remaining := len(src); remaining > 0; {
// Process multiple blocks at once.
if c.off == api.BlockSize {
nrBlocks := remaining / api.BlockSize
directBytes := nrBlocks * api.BlockSize
if nrBlocks > 0 {
c.doBlocks(dst, src, nrBlocks)
remaining -= directBytes
if remaining == 0 {
return
}
dst = dst[directBytes:]
src = src[directBytes:]
}

// If there's a partial block, generate 1 block of keystream into
// the internal buffer.
c.doBlocks(c.buf[:], nil, 1)
c.off = 0
}

// Process partial blocks from the buffered keystream.
toXor := api.BlockSize - c.off
if remaining < toXor {
toXor = remaining
}
if toXor > 0 {
// The inliner doesn't want to inline this function, but my
// attempts to force BCE don't seem to work with manual
// inlining.
//
// Taking the extra function call overhead here appears to be
// worth it.
c.xorBufBytes(dst, src, toXor)

dst = dst[toXor:]
src = src[toXor:]

remaining -= toXor
}
}
}

func (c *Cipher) xorBufBytes(dst, src []byte, n int) {
// Force bounds check elimination.
buf := c.buf[c.off:]
_ = buf[n-1]
_ = dst[n-1]
_ = src[n-1]

for i := 0; i < n; i++ {
dst[i] = buf[i] ^ src[i]
}
c.off += n
}

// KeyStream sets dst to the raw keystream.
func (c *Cipher) KeyStream(dst []byte) {
for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once.
if c.off == api.BlockSize {
nrBlocks := remaining / api.BlockSize
directBytes := nrBlocks * api.BlockSize
if nrBlocks > 0 {
c.doBlocks(dst, nil, nrBlocks)
remaining -= directBytes
if remaining == 0 {
return
}
dst = dst[directBytes:]
}

// If there's a partial block, generate 1 block of keystream into
// the internal buffer.
c.doBlocks(c.buf[:], nil, 1)
c.off = 0
}

// Process partial blocks from the buffered keystream.
toCopy := api.BlockSize - c.off
if remaining < toCopy {
toCopy = remaining
}
if toCopy > 0 {
copy(dst[:toCopy], c.buf[c.off:c.off+toCopy])
dst = dst[toCopy:]
remaining -= toCopy
c.off += toCopy
}
}
}

func (c *Cipher) doBlocks(dst, src []byte, nrBlocks int) {
if c.ietf {
ctr := uint64(c.state[12])
if ctr+uint64(nrBlocks) > math.MaxUint32 {
panic("chacha20: will exceed key stream per nonce limit")
}
}

activeImpl.Blocks(&c.state, dst, src, nrBlocks)
}

func init() {
supportedImpls = hardware.Register(supportedImpls)
supportedImpls = ref.Register(supportedImpls)
activeImpl = supportedImpls[0]
}
59 changes: 59 additions & 0 deletions crypto/chacha20/internal/api/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copryright (C) 2019 Yawning Angel
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

// Package api provides the ChaCha20 implementation abstract interface.
package api

const (
// BlockSize is the size of a ChaCha20 block in bytes.
BlockSize = 64

// StateSize is the size of the ChaCha20 state as 32 bit unsigned words.
StateSize = 16

// HashSize is the size of the HChaCha output in bytes.
HashSize = 32

// HNonceSize is the HChaCha20 nonce size in bytes.
HNonceSize = 16

// Sigma0 is the first word of the ChaCha constant.
Sigma0 = uint32(0x61707865)

// Sigma1 is the second word of the ChaCha constant.
Sigma1 = uint32(0x3320646e)

// Sigma2 is the third word of the ChaCha constant.
Sigma2 = uint32(0x79622d32)

// Sigma3 is the fourth word of the ChaCha constant.
Sigma3 = uint32(0x6b206574)
)

// Implementation is a ChaCha20 implementation
type Implementation interface {
// Name returns the name of the implementation.
Name() string

// Blocks calculates the ChaCha20 blocks. If src is not nil, dst will
// be set to the XOR of src with the key stream, otherwise dst will be
// set to the key stream.
Blocks(x *[StateSize]uint32, dst, src []byte, nrBlocks int)

// HChaCha calculates the HChaCha20 hash.
//
// Note: `dst` is guaranteed to be HashSize bytes.
HChaCha(key, nonce []byte, dst []byte)
}
Loading

0 comments on commit 3860a83

Please sign in to comment.