Skip to content

Commit

Permalink
build: support frame-pointer for arm64
Browse files Browse the repository at this point in the history
Supporting frame-pointer makes Linux's perf and other profilers much more useful
because it lets them gather a stack trace efficiently on profiling events. Major
changes include:
1. save FP on the word below where RSP is pointing to (proposed by Cherry and Austin)
2. adjust some specific offsets in runtime assembly and wrapper code
3. add support to FP in goroutine scheduler
4. adjust link stack overflow check to take the extra word into account
5. adjust nosplit test cases to enable frame sizes which are 16 bytes aligned

Performance impacts on go1 benchmarks:

Enable frame-pointer (by default)

name                      old time/op    new time/op    delta
BinaryTree17-46              5.94s ± 0%     6.00s ± 0%  +1.03%  (p=0.029 n=4+4)
Fannkuch11-46                2.84s ± 1%     2.77s ± 0%  -2.58%  (p=0.008 n=5+5)
FmtFprintfEmpty-46          55.0ns ± 1%    58.9ns ± 1%  +7.06%  (p=0.008 n=5+5)
FmtFprintfString-46          102ns ± 0%     105ns ± 0%  +2.94%  (p=0.008 n=5+5)
FmtFprintfInt-46             118ns ± 0%     117ns ± 1%  -1.19%  (p=0.000 n=4+5)
FmtFprintfIntInt-46          181ns ± 0%     182ns ± 1%    ~     (p=0.444 n=5+5)
FmtFprintfPrefixedInt-46     215ns ± 1%     214ns ± 0%    ~     (p=0.254 n=5+4)
FmtFprintfFloat-46           292ns ± 0%     296ns ± 0%  +1.46%  (p=0.029 n=4+4)
FmtManyArgs-46               720ns ± 0%     732ns ± 0%  +1.72%  (p=0.008 n=5+5)
GobDecode-46                9.82ms ± 1%   10.03ms ± 2%  +2.10%  (p=0.008 n=5+5)
GobEncode-46                8.14ms ± 0%    8.72ms ± 1%  +7.14%  (p=0.008 n=5+5)
Gzip-46                      420ms ± 0%     424ms ± 0%  +0.92%  (p=0.008 n=5+5)
Gunzip-46                   48.2ms ± 0%    48.4ms ± 0%  +0.41%  (p=0.008 n=5+5)
HTTPClientServer-46          201µs ± 4%     201µs ± 0%    ~     (p=0.730 n=5+4)
JSONEncode-46               17.1ms ± 0%    17.7ms ± 1%  +3.80%  (p=0.008 n=5+5)
JSONDecode-46               88.0ms ± 0%    90.1ms ± 0%  +2.42%  (p=0.008 n=5+5)
Mandelbrot200-46            5.06ms ± 0%    5.07ms ± 0%    ~     (p=0.310 n=5+5)
GoParse-46                  5.04ms ± 0%    5.12ms ± 0%  +1.53%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46       117ns ± 0%     117ns ± 0%    ~     (all equal)
RegexpMatchEasy0_1K-46       332ns ± 0%     329ns ± 0%  -0.78%  (p=0.008 n=5+5)
RegexpMatchEasy1_32-46       104ns ± 0%     113ns ± 0%  +8.65%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46       563ns ± 0%     569ns ± 0%  +1.10%  (p=0.008 n=5+5)
RegexpMatchMedium_32-46      167ns ± 2%     177ns ± 1%  +5.74%  (p=0.008 n=5+5)
RegexpMatchMedium_1K-46     49.5µs ± 0%    53.4µs ± 0%  +7.81%  (p=0.008 n=5+5)
RegexpMatchHard_32-46       2.56µs ± 1%    2.72µs ± 0%  +6.01%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46       77.0µs ± 0%    81.8µs ± 0%  +6.24%  (p=0.016 n=5+4)
Revcomp-46                   631ms ± 1%     627ms ± 1%    ~     (p=0.095 n=5+5)
Template-46                 81.8ms ± 0%    86.3ms ± 0%  +5.55%  (p=0.008 n=5+5)
TimeParse-46                 423ns ± 0%     432ns ± 0%  +2.32%  (p=0.008 n=5+5)
TimeFormat-46                478ns ± 2%     497ns ± 1%  +3.89%  (p=0.008 n=5+5)
[Geo mean]                  71.6µs         73.3µs       +2.45%

name                      old speed      new speed      delta
GobDecode-46              78.1MB/s ± 1%  76.6MB/s ± 2%  -2.04%  (p=0.008 n=5+5)
GobEncode-46              94.3MB/s ± 0%  88.0MB/s ± 1%  -6.67%  (p=0.008 n=5+5)
Gzip-46                   46.2MB/s ± 0%  45.8MB/s ± 0%  -0.91%  (p=0.008 n=5+5)
Gunzip-46                  403MB/s ± 0%   401MB/s ± 0%  -0.41%  (p=0.008 n=5+5)
JSONEncode-46              114MB/s ± 0%   109MB/s ± 1%  -3.66%  (p=0.008 n=5+5)
JSONDecode-46             22.0MB/s ± 0%  21.5MB/s ± 0%  -2.35%  (p=0.008 n=5+5)
GoParse-46                11.5MB/s ± 0%  11.3MB/s ± 0%  -1.51%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46     272MB/s ± 0%   272MB/s ± 1%    ~     (p=0.190 n=4+5)
RegexpMatchEasy0_1K-46    3.08GB/s ± 0%  3.11GB/s ± 0%  +0.77%  (p=0.008 n=5+5)
RegexpMatchEasy1_32-46     306MB/s ± 0%   283MB/s ± 0%  -7.63%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46    1.82GB/s ± 0%  1.80GB/s ± 0%  -1.07%  (p=0.008 n=5+5)
RegexpMatchMedium_32-46   5.99MB/s ± 0%  5.64MB/s ± 1%  -5.77%  (p=0.016 n=4+5)
RegexpMatchMedium_1K-46   20.7MB/s ± 0%  19.2MB/s ± 0%  -7.25%  (p=0.008 n=5+5)
RegexpMatchHard_32-46     12.5MB/s ± 1%  11.8MB/s ± 0%  -5.66%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46     13.3MB/s ± 0%  12.5MB/s ± 1%  -6.01%  (p=0.008 n=5+5)
Revcomp-46                 402MB/s ± 1%   405MB/s ± 1%    ~     (p=0.095 n=5+5)
Template-46               23.7MB/s ± 0%  22.5MB/s ± 0%  -5.25%  (p=0.008 n=5+5)
[Geo mean]                82.2MB/s       79.6MB/s       -3.26%

Disable frame-pointer (GOEXPERIMENT=noframepointer)

name                      old time/op    new time/op    delta
BinaryTree17-46              5.94s ± 0%     5.96s ± 0%  +0.39%  (p=0.029 n=4+4)
Fannkuch11-46                2.84s ± 1%     2.79s ± 1%  -1.68%  (p=0.008 n=5+5)
FmtFprintfEmpty-46          55.0ns ± 1%    55.2ns ± 3%    ~     (p=0.794 n=5+5)
FmtFprintfString-46          102ns ± 0%     103ns ± 0%  +0.98%  (p=0.016 n=5+4)
FmtFprintfInt-46             118ns ± 0%     115ns ± 0%  -2.54%  (p=0.029 n=4+4)
FmtFprintfIntInt-46          181ns ± 0%     179ns ± 0%  -1.10%  (p=0.000 n=5+4)
FmtFprintfPrefixedInt-46     215ns ± 1%     213ns ± 0%    ~     (p=0.143 n=5+4)
FmtFprintfFloat-46           292ns ± 0%     300ns ± 0%  +2.83%  (p=0.029 n=4+4)
FmtManyArgs-46               720ns ± 0%     739ns ± 0%  +2.64%  (p=0.008 n=5+5)
GobDecode-46                9.82ms ± 1%    9.78ms ± 1%    ~     (p=0.151 n=5+5)
GobEncode-46                8.14ms ± 0%    8.12ms ± 1%    ~     (p=0.690 n=5+5)
Gzip-46                      420ms ± 0%     420ms ± 0%    ~     (p=0.548 n=5+5)
Gunzip-46                   48.2ms ± 0%    48.0ms ± 0%  -0.33%  (p=0.032 n=5+5)
HTTPClientServer-46          201µs ± 4%     199µs ± 3%    ~     (p=0.548 n=5+5)
JSONEncode-46               17.1ms ± 0%    17.2ms ± 0%    ~     (p=0.056 n=5+5)
JSONDecode-46               88.0ms ± 0%    88.6ms ± 0%  +0.64%  (p=0.008 n=5+5)
Mandelbrot200-46            5.06ms ± 0%    5.07ms ± 0%    ~     (p=0.548 n=5+5)
GoParse-46                  5.04ms ± 0%    5.07ms ± 0%  +0.65%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46       117ns ± 0%     112ns ± 4%  -4.27%  (p=0.016 n=4+5)
RegexpMatchEasy0_1K-46       332ns ± 0%     330ns ± 1%    ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-46       104ns ± 0%     110ns ± 1%  +5.29%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46       563ns ± 0%     567ns ± 2%    ~     (p=0.151 n=5+5)
RegexpMatchMedium_32-46      167ns ± 2%     166ns ± 0%    ~     (p=0.333 n=5+4)
RegexpMatchMedium_1K-46     49.5µs ± 0%    49.6µs ± 0%    ~     (p=0.841 n=5+5)
RegexpMatchHard_32-46       2.56µs ± 1%    2.49µs ± 0%  -2.81%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46       77.0µs ± 0%    75.8µs ± 0%  -1.55%  (p=0.008 n=5+5)
Revcomp-46                   631ms ± 1%     628ms ± 0%    ~     (p=0.095 n=5+5)
Template-46                 81.8ms ± 0%    84.3ms ± 1%  +3.05%  (p=0.008 n=5+5)
TimeParse-46                 423ns ± 0%     425ns ± 0%  +0.52%  (p=0.008 n=5+5)
TimeFormat-46                478ns ± 2%     478ns ± 1%    ~     (p=1.000 n=5+5)
[Geo mean]                  71.6µs         71.6µs       -0.01%

name                      old speed      new speed      delta
GobDecode-46              78.1MB/s ± 1%  78.5MB/s ± 1%    ~     (p=0.151 n=5+5)
GobEncode-46              94.3MB/s ± 0%  94.5MB/s ± 1%    ~     (p=0.690 n=5+5)
Gzip-46                   46.2MB/s ± 0%  46.2MB/s ± 0%    ~     (p=0.571 n=5+5)
Gunzip-46                  403MB/s ± 0%   404MB/s ± 0%  +0.33%  (p=0.032 n=5+5)
JSONEncode-46              114MB/s ± 0%   113MB/s ± 0%    ~     (p=0.056 n=5+5)
JSONDecode-46             22.0MB/s ± 0%  21.9MB/s ± 0%  -0.64%  (p=0.008 n=5+5)
GoParse-46                11.5MB/s ± 0%  11.4MB/s ± 0%  -0.64%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-46     272MB/s ± 0%   285MB/s ± 4%  +4.74%  (p=0.016 n=4+5)
RegexpMatchEasy0_1K-46    3.08GB/s ± 0%  3.10GB/s ± 1%    ~     (p=0.151 n=5+5)
RegexpMatchEasy1_32-46     306MB/s ± 0%   290MB/s ± 1%  -5.21%  (p=0.029 n=4+4)
RegexpMatchEasy1_1K-46    1.82GB/s ± 0%  1.81GB/s ± 2%    ~     (p=0.151 n=5+5)
RegexpMatchMedium_32-46   5.99MB/s ± 0%  6.02MB/s ± 1%    ~     (p=0.063 n=4+5)
RegexpMatchMedium_1K-46   20.7MB/s ± 0%  20.7MB/s ± 0%    ~     (p=0.659 n=5+5)
RegexpMatchHard_32-46     12.5MB/s ± 1%  12.8MB/s ± 0%  +2.88%  (p=0.008 n=5+5)
RegexpMatchHard_1K-46     13.3MB/s ± 0%  13.5MB/s ± 0%  +1.58%  (p=0.008 n=5+5)
Revcomp-46                 402MB/s ± 1%   405MB/s ± 0%    ~     (p=0.095 n=5+5)
Template-46               23.7MB/s ± 0%  23.0MB/s ± 1%  -2.95%  (p=0.008 n=5+5)
[Geo mean]                82.2MB/s       82.3MB/s       +0.04%

Frame-pointer is enabled on Linux by default but can be disabled by setting: GOEXPERIMENT=noframepointer.

Fixes #10110

Change-Id: I1bfaca6dba29a63009d7c6ab04ed7a1413d9479e
Reviewed-on: https://go-review.googlesource.com/61511
Reviewed-by: Cherry Zhang <[email protected]>
Run-TryBot: Cherry Zhang <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
  • Loading branch information
Zheng-Xu authored and cherrymui committed Aug 29, 2018
1 parent 7b88b22 commit 8f4fd3f
Show file tree
Hide file tree
Showing 14 changed files with 243 additions and 57 deletions.
8 changes: 4 additions & 4 deletions src/cmd/compile/internal/arm64/ggen.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ import (
var darwin = objabi.GOOS == "darwin"

func padframe(frame int64) int64 {
// arm64 requires that the frame size (not counting saved LR)
// be empty or be 8 mod 16. If not, pad it.
if frame != 0 && frame%16 != 8 {
frame += 8
// arm64 requires that the frame size (not counting saved FP&LR)
// be 16 bytes aligned. If not, pad it.
if frame%16 != 0 {
frame += 16 - (frame % 16)
}
return frame
}
Expand Down
6 changes: 4 additions & 2 deletions src/cmd/compile/internal/gc/pgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,8 @@ func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool
if Ctxt.FixedFrameSize() == 0 {
offs -= int64(Widthptr)
}
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
// There is a word space for FP on ARM64 even if the frame pointer is disabled
offs -= int64(Widthptr)
}

Expand Down Expand Up @@ -607,7 +608,8 @@ func stackOffset(slot ssa.LocalSlot) int32 {
if Ctxt.FixedFrameSize() == 0 {
base -= int64(Widthptr)
}
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
// There is a word space for FP on ARM64 even if the frame pointer is disabled
base -= int64(Widthptr)
}
case PPARAM, PPARAMOUT:
Expand Down
10 changes: 7 additions & 3 deletions src/cmd/internal/obj/arm64/asm7.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type ctxt7 struct {
blitrl *obj.Prog
elitrl *obj.Prog
autosize int32
extrasize int32
instoffset int64
pc int64
pool struct {
Expand Down Expand Up @@ -777,7 +778,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
}

c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset&0xffffffff) + 8}
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
p.To.Offset &= 0xffffffff // extrasize is no longer needed

bflag := 1
pc := int64(0)
Expand Down Expand Up @@ -1436,7 +1438,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
c.instoffset = int64(c.autosize) + a.Offset
// The frame top 8 or 16 bytes are for FP
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
return autoclass(c.instoffset)

case obj.NAME_PARAM:
Expand Down Expand Up @@ -1536,7 +1539,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
c.instoffset = int64(c.autosize) + a.Offset
// The frame top 8 or 16 bytes are for FP
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)

case obj.NAME_PARAM:
if a.Reg == REGSP {
Expand Down
183 changes: 166 additions & 17 deletions src/cmd/internal/obj/arm64/obj7.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,32 +542,35 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
c.autosize += 8
}

if c.autosize != 0 && c.autosize&(16-1) != 0 {
// The frame includes an LR.
// If the frame size is 8, it's only an LR,
// so there's no potential for breaking references to
// local variables by growing the frame size,
// because there are no local variables.
// But otherwise, if there is a non-empty locals section,
// the author of the code is responsible for making sure
// that the frame size is 8 mod 16.
if c.autosize == 8 {
c.autosize += 8
c.cursym.Func.Locals += 8
if c.autosize != 0 {
extrasize := int32(0)
if c.autosize%16 == 8 {
// Allocate extra 8 bytes on the frame top to save FP
extrasize = 8
} else if c.autosize&(16-1) == 0 {
// Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
extrasize = 16
} else {
c.ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, c.autosize-8)
c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
}
c.autosize += extrasize
c.cursym.Func.Locals += extrasize

// low 32 bits for autosize
// high 32 bits for extrasize
p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
} else {
// NOFRAME
p.To.Offset = 0
}

if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {
if c.ctxt.Debugvlog {
c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name)
}
c.cursym.Func.Text.Mark |= LEAF
}

// FP offsets need an updated p.To.Offset.
p.To.Offset = int64(c.autosize) - 8

if cursym.Func.Text.Mark&LEAF != 0 {
cursym.Set(obj.AttrLeaf, true)
if p.From.Sym.NoFrame() {
Expand Down Expand Up @@ -631,6 +634,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
q1.Spadj = aoffset
}

if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGFP
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
q1.To.Offset = -8

q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = 8
q1.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGFP
}

if c.cursym.Func.Text.From.Sym.Wrapper() {
// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
//
Expand Down Expand Up @@ -753,9 +776,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.To.Type = obj.TYPE_REG
p.To.Reg = REGSP
p.Spadj = -c.autosize

if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
p = obj.Appendp(p, c.newprog)
p.As = ASUB
p.From.Type = obj.TYPE_CONST
p.From.Offset = 8
p.Reg = REGSP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGFP
}
}
} else {
/* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/

if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
p.From.Offset = -8
p.To.Type = obj.TYPE_REG
p.To.Reg = REGFP
p = obj.Appendp(p, c.newprog)
}

aoffset := c.autosize

if aoffset > 0xF0 {
Expand Down Expand Up @@ -814,7 +858,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.Spadj = int32(+p.From.Offset)
}
}
break

case obj.AGETCALLERPC:
if cursym.Leaf() {
Expand All @@ -828,6 +871,112 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
}

case obj.ADUFFCOPY:
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
// ADR ret_addr, R27
// STP (FP, R27), -24(SP)
// SUB 24, SP, FP
// DUFFCOPY
// ret_addr:
// SUB 8, SP, FP

q1 := p
// copy DUFFCOPY from q1 to q4
q4 := obj.Appendp(p, c.newprog)
q4.Pos = p.Pos
q4.As = obj.ADUFFCOPY
q4.To = p.To

q1.As = AADR
q1.From.Type = obj.TYPE_BRANCH
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REG_R27

q2 := obj.Appendp(q1, c.newprog)
q2.Pos = p.Pos
q2.As = ASTP
q2.From.Type = obj.TYPE_REGREG
q2.From.Reg = REGFP
q2.From.Offset = int64(REG_R27)
q2.To.Type = obj.TYPE_MEM
q2.To.Reg = REGSP
q2.To.Offset = -24

// maintaine FP for DUFFCOPY
q3 := obj.Appendp(q2, c.newprog)
q3.Pos = p.Pos
q3.As = ASUB
q3.From.Type = obj.TYPE_CONST
q3.From.Offset = 24
q3.Reg = REGSP
q3.To.Type = obj.TYPE_REG
q3.To.Reg = REGFP

q5 := obj.Appendp(q4, c.newprog)
q5.Pos = p.Pos
q5.As = ASUB
q5.From.Type = obj.TYPE_CONST
q5.From.Offset = 8
q5.Reg = REGSP
q5.To.Type = obj.TYPE_REG
q5.To.Reg = REGFP
q1.Pcond = q5
p = q5
}

case obj.ADUFFZERO:
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
// ADR ret_addr, R27
// STP (FP, R27), -24(SP)
// SUB 24, SP, FP
// DUFFZERO
// ret_addr:
// SUB 8, SP, FP

q1 := p
// copy DUFFZERO from q1 to q4
q4 := obj.Appendp(p, c.newprog)
q4.Pos = p.Pos
q4.As = obj.ADUFFZERO
q4.To = p.To

q1.As = AADR
q1.From.Type = obj.TYPE_BRANCH
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REG_R27

q2 := obj.Appendp(q1, c.newprog)
q2.Pos = p.Pos
q2.As = ASTP
q2.From.Type = obj.TYPE_REGREG
q2.From.Reg = REGFP
q2.From.Offset = int64(REG_R27)
q2.To.Type = obj.TYPE_MEM
q2.To.Reg = REGSP
q2.To.Offset = -24

// maintaine FP for DUFFZERO
q3 := obj.Appendp(q2, c.newprog)
q3.Pos = p.Pos
q3.As = ASUB
q3.From.Type = obj.TYPE_CONST
q3.From.Offset = 24
q3.Reg = REGSP
q3.To.Type = obj.TYPE_REG
q3.To.Reg = REGFP

q5 := obj.Appendp(q4, c.newprog)
q5.Pos = p.Pos
q5.As = ASUB
q5.From.Type = obj.TYPE_CONST
q5.From.Offset = 8
q5.Reg = REGSP
q5.To.Type = obj.TYPE_REG
q5.To.Reg = REGFP
q1.Pcond = q5
p = q5
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/internal/objabi/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func init() {
}

func Framepointer_enabled(goos, goarch string) bool {
return framepointer_enabled != 0 && goarch == "amd64" && goos != "nacl"
return framepointer_enabled != 0 && (goarch == "amd64" && goos != "nacl" || goarch == "arm64" && goos == "linux")
}

func addexp(s string) {
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/link/internal/ld/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -1815,6 +1815,10 @@ func (ctxt *Link) dostkcheck() {
ch.up = nil

ch.limit = objabi.StackLimit - callsize(ctxt)
if objabi.GOARCH == "arm64" {
// need extra 8 bytes below SP to save FP
ch.limit -= 8
}

// Check every function, but do the nosplit functions in a first pass,
// to make the printed failure chains as short as possible.
Expand Down
Loading

0 comments on commit 8f4fd3f

Please sign in to comment.