From 0ec363d9c894cf80e0438524ed522bffdb0cced5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Wed, 25 Jan 2023 16:19:03 +0000 Subject: [PATCH] avoid breaking intrinsics when obfuscating names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We obfuscate import paths as well as their declared names. The compiler treats some packages and APIs in special ways, and the way it detects those is by looking at import paths and names. In the past, we have avoided obfuscating some names like embed.FS or reflect.Value.MethodByName for this reason. Otherwise, go:embed or the linker's deadcode elimination might be broken. This matching by path and name also happens with compiler intrinsics. Intrinsics allow the compiler to rewrite some standard library calls with small and efficient assembly, depending on the target GOARCH. For example, math/bits.TrailingZeros32 gets replaced with ssa.OpCtz32, which on amd64 may result in using the TZCNTL instruction. We never noticed that we were breaking many of these intrinsics. The intrinsics for funcs declared in the runtime and its dependencies still worked properly, as we do not obfuscate those packages yet. However, for other packages like math/bits and sync/atomic, the intrinsics were being entirely disabled due to obfuscated names. Skipping intrinsics is particularly bad for performance, and it also leads to slightly larger binaries: │ old │ new │ │ bin-B │ bin-B vs base │ Build-16 5.450Mi ± ∞ ¹ 5.333Mi ± ∞ ¹ -2.15% (p=0.029 n=4) Finally, the main reason we noticed that intrinsics were broken is that apparently GOARCH=mips fails to link without them, as some symbols end up being not defined at all. This patch fixes builds for the MIPS family of architectures. Rather than building and linking all of std for every GOARCH, test that intrinsics work by asking the compiler to print which intrinsics are being applied, and checking that math/bits gets them. This fix is relatively unfortunate, as it means we stop obfuscating about 120 function names and a handful of package paths. However, fixing builds and intrinsics is much more important. We can figure out better ways to deal with intrinsics in the future. Fixes #646. --- go_std_tables.go | 145 ++++++++++++++++++++++++++++++- main.go | 20 +++-- scripts/gen-go-std-tables.sh | 20 ++++- shared.go | 3 + testdata/script/crossbuild.txtar | 13 ++- 5 files changed, 191 insertions(+), 10 deletions(-) diff --git a/go_std_tables.go b/go_std_tables.go index 4ab97072..3e45ebe1 100644 --- a/go_std_tables.go +++ b/go_std_tables.go @@ -1,6 +1,6 @@ // Code generated by scripts/gen-go-std-tables.sh; DO NOT EDIT. -// Generated from Go version devel go1.21-733ba92187 Tue Jan 24 13:28:54 2023 +0000. +// Generated from Go version devel go1.21-e216ee7e74 Wed Jan 25 03:23:11 2023 +0000. package main @@ -48,3 +48,146 @@ var runtimeLinknamed = []string{ "syscall/js", "time", } + +var compilerIntrinsicsPkgs = map[string]bool{ + "math": true, + "math/big": true, + "math/bits": true, + "runtime": true, + "runtime/internal/atomic": true, + "runtime/internal/math": true, + "runtime/internal/sys": true, + "sync": true, + "sync/atomic": true, +} + +var compilerIntrinsicsFuncs = map[string]bool{ + "math.Abs": true, + "math/big.mulWW": true, + "math/bits.Add": true, + "math/bits.Add64": true, + "math/bits.Div": true, + "math/bits.Div64": true, + "math/bits.Len": true, + "math/bits.Len16": true, + "math/bits.Len32": true, + "math/bits.Len64": true, + "math/bits.Len8": true, + "math/bits.Mul": true, + "math/bits.Mul64": true, + "math/bits.OnesCount": true, + "math/bits.OnesCount16": true, + "math/bits.OnesCount32": true, + "math/bits.OnesCount64": true, + "math/bits.OnesCount8": true, + "math/bits.Reverse": true, + "math/bits.Reverse16": true, + "math/bits.Reverse32": true, + "math/bits.Reverse64": true, + "math/bits.Reverse8": true, + "math/bits.ReverseBytes32": true, + "math/bits.ReverseBytes64": true, + "math/bits.RotateLeft": true, + "math/bits.RotateLeft16": true, + "math/bits.RotateLeft32": true, + "math/bits.RotateLeft64": true, + "math/bits.RotateLeft8": true, + "math/bits.Sub": true, + "math/bits.Sub64": true, + "math/bits.TrailingZeros16": true, + "math/bits.TrailingZeros32": true, + "math/bits.TrailingZeros64": true, + "math/bits.TrailingZeros8": true, + "math.Ceil": true, + "math.Copysign": true, + "math.Floor": true, + "math.FMA": true, + "math.Round": true, + "math.RoundToEven": true, + "math.sqrt": true, + "math.Trunc": true, + "runtime/internal/atomic.And": true, + "runtime/internal/atomic.And8": true, + "runtime/internal/atomic.Cas": true, + "runtime/internal/atomic.Cas64": true, + "runtime/internal/atomic.Casint32": true, + "runtime/internal/atomic.Casint64": true, + "runtime/internal/atomic.Casp1": true, + "runtime/internal/atomic.CasRel": true, + "runtime/internal/atomic.Casuintptr": true, + "runtime/internal/atomic.Load": true, + "runtime/internal/atomic.Load64": true, + "runtime/internal/atomic.Load8": true, + "runtime/internal/atomic.LoadAcq": true, + "runtime/internal/atomic.LoadAcq64": true, + "runtime/internal/atomic.LoadAcquintptr": true, + "runtime/internal/atomic.Loadint32": true, + "runtime/internal/atomic.Loadint64": true, + "runtime/internal/atomic.Loadp": true, + "runtime/internal/atomic.Loaduint": true, + "runtime/internal/atomic.Loaduintptr": true, + "runtime/internal/atomic.Or": true, + "runtime/internal/atomic.Or8": true, + "runtime/internal/atomic.Store": true, + "runtime/internal/atomic.Store64": true, + "runtime/internal/atomic.Store8": true, + "runtime/internal/atomic.Storeint32": true, + "runtime/internal/atomic.Storeint64": true, + "runtime/internal/atomic.StorepNoWB": true, + "runtime/internal/atomic.StoreRel": true, + "runtime/internal/atomic.StoreRel64": true, + "runtime/internal/atomic.StoreReluintptr": true, + "runtime/internal/atomic.Storeuintptr": true, + "runtime/internal/atomic.Xadd": true, + "runtime/internal/atomic.Xadd64": true, + "runtime/internal/atomic.Xaddint32": true, + "runtime/internal/atomic.Xaddint64": true, + "runtime/internal/atomic.Xadduintptr": true, + "runtime/internal/atomic.Xchg": true, + "runtime/internal/atomic.Xchg64": true, + "runtime/internal/atomic.Xchgint32": true, + "runtime/internal/atomic.Xchgint64": true, + "runtime/internal/atomic.Xchguintptr": true, + "runtime/internal/math.Mul64": true, + "runtime/internal/math.MulUintptr": true, + "runtime/internal/sys.Bswap32": true, + "runtime/internal/sys.Bswap64": true, + "runtime/internal/sys.Len64": true, + "runtime/internal/sys.Len8": true, + "runtime/internal/sys.OnesCount64": true, + "runtime/internal/sys.Prefetch": true, + "runtime/internal/sys.PrefetchStreamed": true, + "runtime/internal/sys.TrailingZeros32": true, + "runtime/internal/sys.TrailingZeros64": true, + "runtime/internal/sys.TrailingZeros8": true, + "runtime.mulUintptr": true, + "runtime.publicationBarrier": true, + "sync/atomic.AddInt32": true, + "sync/atomic.AddInt64": true, + "sync/atomic.AddUint32": true, + "sync/atomic.AddUint64": true, + "sync/atomic.AddUintptr": true, + "sync/atomic.CompareAndSwapInt32": true, + "sync/atomic.CompareAndSwapInt64": true, + "sync/atomic.CompareAndSwapUint32": true, + "sync/atomic.CompareAndSwapUint64": true, + "sync/atomic.CompareAndSwapUintptr": true, + "sync/atomic.LoadInt32": true, + "sync/atomic.LoadInt64": true, + "sync/atomic.LoadPointer": true, + "sync/atomic.LoadUint32": true, + "sync/atomic.LoadUint64": true, + "sync/atomic.LoadUintptr": true, + "sync/atomic.StoreInt32": true, + "sync/atomic.StoreInt64": true, + "sync/atomic.StoreUint32": true, + "sync/atomic.StoreUint64": true, + "sync/atomic.StoreUintptr": true, + "sync/atomic.SwapInt32": true, + "sync/atomic.SwapInt64": true, + "sync/atomic.SwapUint32": true, + "sync/atomic.SwapUint64": true, + "sync/atomic.SwapUintptr": true, + "sync.runtime_LoadAcquintptr": true, + "sync.runtime_StoreReluintptr": true, +} diff --git a/main.go b/main.go index c3d04e0f..cdc8f2f5 100644 --- a/main.go +++ b/main.go @@ -839,7 +839,7 @@ func replaceAsmNames(buf *bytes.Buffer, remaining []byte) { name := string(remaining[:nameEnd]) remaining = remaining[nameEnd:] - if lpkg.ToObfuscate { + if lpkg.ToObfuscate && !compilerIntrinsicsFuncs[lpkg.ImportPath+"."+name] { newName := hashWithPackage(lpkg, name) if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed log.Printf("asm name %q hashed with %x to %q", name, curPkg.GarbleActionID, newName) @@ -959,7 +959,11 @@ func transformCompile(args []string) ([]string, error) { } tf.handleDirectives(file.Comments) file = tf.transformGoFile(file) - if newPkgPath != "" { + // newPkgPath might be the original ImportPath in some edge cases like + // compilerIntrinsics; we don't want to use slashes in package names. + // TODO: when we do away with those edge cases, only check the string is + // non-empty. + if newPkgPath != "" && newPkgPath != curPkg.ImportPath { file.Name.Name = newPkgPath } @@ -1052,7 +1056,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) { func (tf *transformer) transformLinkname(localName, newName string) (string, string) { // obfuscate the local name, if the current package is obfuscated - if curPkg.ToObfuscate { + if curPkg.ToObfuscate && !compilerIntrinsicsFuncs[curPkg.ImportPath+"."+localName] { localName = hashWithPackage(curPkg, localName) } if newName == "" { @@ -1092,7 +1096,7 @@ func (tf *transformer) transformLinkname(localName, newName string) (string, str } panic(err) // shouldn't happen } - if lpkg.ToObfuscate { + if lpkg.ToObfuscate && !compilerIntrinsicsFuncs[lpkg.ImportPath+"."+foreignName] { // The name exists and was obfuscated; obfuscate the new name. newForeignName := hashWithPackage(lpkg, foreignName) newPkgPath := pkgPath @@ -1825,7 +1829,8 @@ func (tf *transformer) transformGoFile(file *ast.File) *ast.File { // The Go toolchain needs to detect symbols from these packages, // so we are not obfuscating their package paths or declared names. - switch pkg.Path() { + path := pkg.Path() + switch path { case "embed": // FS is detected by the compiler for //go:embed. return name == "FS" @@ -1849,7 +1854,6 @@ func (tf *transformer) transformGoFile(file *ast.File) *ast.File { return true } - path := pkg.Path() lpkg, err := listPackage(path) if err != nil { panic(err) // shouldn't happen @@ -1895,6 +1899,10 @@ func (tf *transformer) transformGoFile(file *ast.File) *ast.File { case *types.TypeName: debugName = "type" case *types.Func: + if compilerIntrinsicsFuncs[path+"."+name] { + return true + } + sign := obj.Type().(*types.Signature) if sign.Recv() == nil { debugName = "func" diff --git a/scripts/gen-go-std-tables.sh b/scripts/gen-go-std-tables.sh index 4b76daa2..1af7cd8b 100755 --- a/scripts/gen-go-std-tables.sh +++ b/scripts/gen-go-std-tables.sh @@ -3,6 +3,7 @@ # We can rewrite this bash script in Go if a dependency on bash and coreutils # is a problem during development. +goroot=$(go env GOROOT) go_version=$(go env GOVERSION) # not "go version", to exclude GOOS/GOARCH runtime_and_deps=$(go list -deps runtime) @@ -11,12 +12,17 @@ runtime_and_deps=$(go list -deps runtime) # This resulting list is what we need to "go list" when obfuscating the runtime, # as they are the packages that we may be missing. runtime_linknamed=$(comm -23 <( - sed -rn 's@//go:linkname .* ([^.]*)\.[^.]*@\1@p' $(go env GOROOT)/src/runtime/*.go | grep -vE '^main|^runtime\.' | sort -u + sed -rn 's@//go:linkname .* ([^.]*)\.[^.]*@\1@p' "${goroot}"/src/runtime/*.go | grep -vE '^main|^runtime\.' | sort -u ) <( # Note that we assume this is constant across platforms. go list -deps runtime | sort -u )) +compiler_intrinsics_table="$(sed -rn 's@.*\b(addF|alias)\("([^"]*)", "([^"]*)",.*@\2 \3@p' "${goroot}"/src/cmd/compile/internal/ssagen/ssa.go | sort -u)" +compiler_intrinsics_paths="$(while read path name; do + echo ${path} +done <<<"${compiler_intrinsics_table}" | sort -u)" + gofmt >go_std_tables.go <