diff --git a/jar/jar.go b/jar/jar.go index abfae35..9cd03fd 100644 --- a/jar/jar.go +++ b/jar/jar.go @@ -23,8 +23,11 @@ import ( "fmt" "io" "io/fs" + "os" "path" "strings" + + zipfork "github.com/google/log4jscanner/third_party/zip" ) const ( @@ -68,6 +71,72 @@ func Parse(r *zip.Reader) (*Report, error) { }, nil } +// ReadCloser mirrors zip.ReadCloser. +type ReadCloser struct { + zip.Reader + + f *os.File +} + +// Close closes the underlying file. +func (r *ReadCloser) Close() error { + return r.f.Close() +} + +// OpenReader mirrors zip.OpenReader, loading a JAR from a file, but supports +// self-executable JARs. See NewReader() for details. +func OpenReader(path string) (r *ReadCloser, offset int64, err error) { + f, err := os.Open(path) + if err != nil { + return + } + info, err := f.Stat() + if err != nil { + f.Close() + return + } + zr, offset, err := NewReader(f, info.Size()) + if err != nil { + f.Close() + return + } + return &ReadCloser{*zr, f}, offset, nil +} + +// offsetReader is a io.ReaderAt that starts at some offset from the start of +// the file. +type offsetReader struct { + ra io.ReaderAt + offset int64 +} + +func (o offsetReader) ReadAt(p []byte, off int64) (n int, err error) { + return o.ra.ReadAt(p, off+o.offset) +} + +// NewReader is a wrapper around zip.NewReader that supports self-executable +// JARs. JAR files with prefixed data, such as a bash script to allow them to +// run directly. +// +// If the ZIP contains a prefix, the returned offset indicates the size of the +// prefix. +// +// See: +// - https://kevinboone.me/execjava.html +// - https://github.com/golang/go/issues/10464 +func NewReader(ra io.ReaderAt, size int64) (zr *zip.Reader, offset int64, err error) { + zr, err = zip.NewReader(ra, size) + if err == nil || !errors.Is(err, zip.ErrFormat) { + return zr, 0, err + } + offset, err = zipfork.ReadZIPOffset(ra, size) + if err != nil { + return nil, 0, err + } + zr, err = zip.NewReader(offsetReader{ra, offset}, size-offset) + return zr, offset, err +} + type checker struct { // Does the JAR contain the JNDI lookup class? hasLookupClass bool diff --git a/jar/jar_test.go b/jar/jar_test.go index 0da9f3a..6ccbf34 100644 --- a/jar/jar_test.go +++ b/jar/jar_test.go @@ -15,7 +15,6 @@ package jar import ( - "archive/zip" "path/filepath" "testing" ) @@ -51,6 +50,7 @@ func TestParse(t *testing.T) { // Test case where it contains a JndiLookupOther.class file that shouldn't be detected as vulnerable {"similarbutnotvuln.jar", false}, {"vuln-class.jar", true}, + {"vuln-class-executable", true}, {"vuln-class.jar.patched", false}, {"good_jar_in_jar.jar", false}, {"good_jar_in_jar_in_jar.jar", false}, @@ -61,11 +61,14 @@ func TestParse(t *testing.T) { {"bad_jar_with_invalid_jar.jar", true}, {"bad_jar_with_invalid_jar.jar.patched", false}, {"good_jar_with_invalid_jar.jar", false}, + {"helloworld-executable", false}, + {"helloworld.jar", false}, + {"helloworld.signed.jar", false}, } for _, tc := range testCases { t.Run(tc.filename, func(t *testing.T) { p := testdataPath(tc.filename) - zr, err := zip.OpenReader(p) + zr, _, err := OpenReader(p) if err != nil { t.Fatalf("zip.OpenReader failed: %v", err) } @@ -85,7 +88,7 @@ func TestParse(t *testing.T) { func BenchmarkParse(b *testing.B) { filename := "safe1.jar" p := testdataPath(filename) - zr, err := zip.OpenReader(p) + zr, _, err := OpenReader(p) if err != nil { b.Fatalf("zip.OpenReader failed: %v", err) } diff --git a/jar/rewrite.go b/jar/rewrite.go index 328fcc3..4a016bb 100644 --- a/jar/rewrite.go +++ b/jar/rewrite.go @@ -33,7 +33,50 @@ var skipSuffixes = [...]string{ ".SF", } +// RewriteJAR is like Rewrite but accounts for self-executable JARs, copying +// any prefixed data that may be included in the JAR. +func RewriteJAR(dest io.Writer, src io.ReaderAt, size int64) error { + zr, offset, err := NewReader(src, size) + if err != nil { + return err + } + + if offset > 0 { + src := io.NewSectionReader(src, 0, offset) + if _, err := io.CopyN(dest, src, offset); err != nil { + return err + } + } + return Rewrite(dest, zr) +} + // Rewrite attempts to remove any JndiLookup.class files from a JAR. +// +// Rewrite does not account for self-executable JARs and does not preserve the +// file prefix. This must be explicitly handled, or use RewriteJAR() to do so +// automatically. +// +// zr, offset, err := jar.NewReader(ra, size) +// if err != nil { +// // ... +// } +// dest, err := os.CreateTemp("", "") +// if err != nil { +// // ... +// } +// defer dest.Close() +// +// if offset > 0 { +// // Rewrite prefix. +// src := io.NewSectionReader(ra, 0, offset) +// if _, err := io.CopyN(dest, src, offset); err != nil { +// // ... +// } +// } +// if err := jar.Rewrite(dest, zr); err != nil { +// // ... +// } +// func Rewrite(w io.Writer, zr *zip.Reader) error { zw := zip.NewWriter(w) for _, zipItem := range zr.File { diff --git a/jar/rewrite_test.go b/jar/rewrite_test.go index 85dd163..04e8bb5 100644 --- a/jar/rewrite_test.go +++ b/jar/rewrite_test.go @@ -57,17 +57,22 @@ func cpFile(t *testing.T, dest, src string) { } func autoMitigateJAR(path string) error { - r, err := zip.OpenReader(path) + r, err := os.Open(path) if err != nil { - return fmt.Errorf("open reader: %v", err) + return fmt.Errorf("open flie: %v", err) } defer r.Close() + info, err := r.Stat() + if err != nil { + return fmt.Errorf("stat file: %v", err) + } + f, err := os.CreateTemp("", "") if err != nil { return fmt.Errorf("create temp: %v", err) } defer f.Close() - if err := Rewrite(f, &r.Reader); err != nil { + if err := RewriteJAR(f, r, info.Size()); err != nil { return fmt.Errorf("rewriting zip: %v", err) } @@ -173,6 +178,100 @@ func TestAutoMitigateJAR(t *testing.T) { "bad_jar_in_jar_in_jar.jar", "bad_jar_with_invalid_jar.jar", "vuln-class.jar", + "vuln-class-executable", + } { + tc := tc + t.Run(tc, func(t *testing.T) { + t.Parallel() + src := testdataPath(tc) + dest := filepath.Join(t.TempDir(), tc) + + cpFile(t, dest, src) + + if err := autoMitigateJAR(dest); err != nil { + t.Fatalf("autoMitigateJar(%s) failed: %v", dest, err) + } + + before, _, err := OpenReader(src) + if err != nil { + t.Fatalf("zip.OpenReader(%q) failed: %v", src, err) + } + defer before.Close() + after, _, err := OpenReader(dest) + if err != nil { + t.Fatalf("zip.OpenReader(%q) failed: %v", dest, err) + } + defer after.Close() + checkJARs(t, func(name string) bool { + return path.Base(name) == "JndiLookup.class" + }, &before.Reader, &after.Reader) + }) + } +} + +func TestAutoMitigateExecutable(t *testing.T) { + for _, tc := range []string{ + "helloworld-executable", + "vuln-class-executable", + } { + tc := tc + t.Run(tc, func(t *testing.T) { + t.Parallel() + src := testdataPath(tc) + dest := filepath.Join(t.TempDir(), tc) + + cpFile(t, dest, src) + + if err := autoMitigateJAR(dest); err != nil { + t.Fatalf("autoMitigateJar(%s) failed: %v", dest, err) + } + + sf, err := os.Open(src) + if err != nil { + t.Fatalf("open file %s: %v", src, err) + } + defer sf.Close() + info, err := sf.Stat() + if err != nil { + t.Fatalf("stat file %s: %v", src, err) + } + + _, offset, err := NewReader(sf, info.Size()) + if err != nil { + t.Fatalf("new jar reader %s: %v", src, err) + } + if offset <= 0 { + t.Errorf("expected offset for executable %s: got=%d", src, offset) + } + + df, err := os.Open(dest) + if err != nil { + t.Fatalf("open file %s: %v", dest, err) + } + defer df.Close() + + got := make([]byte, offset) + want := make([]byte, offset) + if _, err := io.ReadFull(sf, want); err != nil { + t.Fatalf("reading prefix from file %s: %v", src, err) + } + if _, err := io.ReadFull(df, got); err != nil { + t.Fatalf("reading prefix from file %s: %v", dest, err) + } + if !bytes.Equal(got, want) { + t.Errorf("prefix did not match after rewrite, got=%q, want=%q", got, want) + } + }) + } +} +func TestAutoMitigate(t *testing.T) { + for _, tc := range []string{ + "arara.jar", + "bad_jar_in_jar.jar", + "bad_jar_in_jar_in_jar.jar", + "bad_jar_with_invalid_jar.jar", + "vuln-class.jar", + "vuln-class-executable", } { tc := tc t.Run(tc, func(t *testing.T) { @@ -186,12 +285,12 @@ func TestAutoMitigateJAR(t *testing.T) { t.Fatalf("autoMitigateJar(%s) failed: %v", dest, err) } - before, err := zip.OpenReader(src) + before, _, err := OpenReader(src) if err != nil { t.Fatalf("zip.OpenReader(%q) failed: %v", src, err) } defer before.Close() - after, err := zip.OpenReader(dest) + after, _, err := OpenReader(dest) if err != nil { t.Fatalf("zip.OpenReader(%q) failed: %v", dest, err) } @@ -220,12 +319,12 @@ func TestAutoMitigateSignedJAR(t *testing.T) { t.Fatalf("autoMitigateJar(%s) failed: %v", dest, err) } - before, err := zip.OpenReader(src) + before, _, err := OpenReader(src) if err != nil { t.Fatalf("zip.OpenReader(%q) failed: %v", src, err) } defer before.Close() - after, err := zip.OpenReader(dest) + after, _, err := OpenReader(dest) if err != nil { t.Fatalf("zip.OpenReader(%q) failed: %v", dest, err) } diff --git a/jar/testdata/generate.sh b/jar/testdata/generate.sh new file mode 100755 index 0000000..f371932 --- /dev/null +++ b/jar/testdata/generate.sh @@ -0,0 +1,29 @@ +#!/bin/bash -e + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo '#!/bin/bash +file_path=`realpath $0` +exec java -jar $file_path "$@" +' > helloworld-executable +cat helloworld.jar >> helloworld-executable +chmod +x helloworld-executable + +echo '#!/bin/bash +file_path=`realpath $0` +exec java -jar $file_path "$@" +' > vuln-class-executable +cat vuln-class.jar >> vuln-class-executable +chmod +x vuln-class-executable diff --git a/jar/testdata/helloworld-executable b/jar/testdata/helloworld-executable new file mode 100755 index 0000000..ae601cf Binary files /dev/null and b/jar/testdata/helloworld-executable differ diff --git a/jar/testdata/vuln-class-executable b/jar/testdata/vuln-class-executable new file mode 100755 index 0000000..5aa6bba Binary files /dev/null and b/jar/testdata/vuln-class-executable differ diff --git a/jar/walker.go b/jar/walker.go index 14c043c..8010bcd 100644 --- a/jar/walker.go +++ b/jar/walker.go @@ -143,7 +143,7 @@ func (w *walker) visit(p string, d fs.DirEntry) error { if !ok { return fmt.Errorf("file doesn't implement reader at: %T", f) } - zr, err := zip.NewReader(ra, info.Size()) + zr, _, err := NewReader(ra, info.Size()) if err != nil { if err == zip.ErrFormat { // Not a JAR. @@ -174,7 +174,7 @@ func (w *walker) visit(p string, d fs.DirEntry) error { } defer tf.Close() - if err := Rewrite(tf, zr); err != nil { + if err := RewriteJAR(tf, ra, info.Size()); err != nil { return fmt.Errorf("failed to rewrite %s: %v", p, err) } f.Close() diff --git a/third_party/zip/LICENSE b/third_party/zip/LICENSE new file mode 100644 index 0000000..6a66aea --- /dev/null +++ b/third_party/zip/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/zip/zip.go b/third_party/zip/zip.go new file mode 100644 index 0000000..be96352 --- /dev/null +++ b/third_party/zip/zip.go @@ -0,0 +1,187 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package zip contains a fork of archive/zip reader. +// +// This package copies logic from archive/zip to read the central directory file +// header from the end of a ZIP file. The only modification is that +// readDirectoryEnd() is exported as ReadZIPOffset(), returning the position of the +// start of the ZIP contents. +// +// https://github.com/golang/go/blob/go1.17.5/src/archive/zip/reader.go +// https://github.com/golang/go/blob/go1.17.5/src/archive/zip/struct.go +// +// See https://go.dev/issues/10464 +// +// This package MUST NOT be depended on by external code and may change at any +// time. This is not an "internal" package only because we're required to place +// external code under a top level "third_party/" directory. +package zip + +import ( + "archive/zip" + "encoding/binary" + "errors" + "io" +) + +type readBuf []byte + +func (b *readBuf) uint8() uint8 { + v := (*b)[0] + *b = (*b)[1:] + return v +} + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} + +func (b *readBuf) sub(n int) readBuf { + b2 := (*b)[:n] + *b = (*b)[n:] + return b2 +} + +const ( + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 + + directoryEndLen = 22 // + comment + directory64LocLen = 20 // + directory64EndLen = 56 // + extra + +) + +type directoryEnd struct { + diskNbr uint32 // unused + dirDiskNbr uint32 // unused + dirRecordsThisDisk uint64 // unused + directoryRecords uint64 + directorySize uint64 + directoryOffset uint64 // relative to file + commentLen uint16 + comment string +} + +// ReadZIPOffset attempts to determine where a ZIP file starts, supporting +// self-executing JARs. JARs concatenated with a bash script. +func ReadZIPOffset(r io.ReaderAt, size int64) (offset int64, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return 0, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return 0, zip.ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + diskNbr: uint32(b.uint16()), + dirDiskNbr: uint32(b.uint16()), + dirRecordsThisDisk: uint64(b.uint16()), + directoryRecords: uint64(b.uint16()), + directorySize: uint64(b.uint32()), + directoryOffset: uint64(b.uint32()), + commentLen: b.uint16(), + } + l := int(d.commentLen) + if l > len(b) { + return 0, errors.New("zip: invalid comment length") + } + d.comment = string(b[:l]) + return directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset), nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return zip.ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory + return -1, nil // the file is not a valid zip64-file + } + p := b.uint64() // relative offset of the zip64 end of central directory record + if b.uint32() != 1 { // total number of disks + return -1, nil // the file is not a valid zip64-file + } + return int64(p), nil +}