diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index f7b3851398f51c..204391fef1c778 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -14,10 +14,10 @@ import ( var darwin = objabi.GOOS == "darwin" func padframe(frame int64) int64 { - // arm64 requires that the frame size (not counting saved LR) - // be empty or be 8 mod 16. If not, pad it. - if frame != 0 && frame%16 != 8 { - frame += 8 + // arm64 requires that the frame size (not counting saved FP&LR) + // be 16 bytes aligned. If not, pad it. + if frame%16 != 0 { + frame += 16 - (frame % 16) } return frame } diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 7f20643ab57feb..563eb9e966388b 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -427,7 +427,8 @@ func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool if Ctxt.FixedFrameSize() == 0 { offs -= int64(Widthptr) } - if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" { + // There is a word space for FP on ARM64 even if the frame pointer is disabled offs -= int64(Widthptr) } @@ -607,7 +608,8 @@ func stackOffset(slot ssa.LocalSlot) int32 { if Ctxt.FixedFrameSize() == 0 { base -= int64(Widthptr) } - if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" { + // There is a word space for FP on ARM64 even if the frame pointer is disabled base -= int64(Widthptr) } case PPARAM, PPARAMOUT: diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index ad4f1725446712..2abb8c2c773d93 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -49,6 +49,7 @@ type ctxt7 struct { blitrl *obj.Prog elitrl *obj.Prog autosize int32 + extrasize int32 instoffset int64 pc int64 pool struct { @@ -777,7 +778,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") } - c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset&0xffffffff) + 8} + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} + p.To.Offset &= 0xffffffff // extrasize is no longer needed bflag := 1 pc := int64(0) @@ -1436,7 +1438,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - c.instoffset = int64(c.autosize) + a.Offset + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) return autoclass(c.instoffset) case obj.NAME_PARAM: @@ -1536,7 +1539,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - c.instoffset = int64(c.autosize) + a.Offset + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) case obj.NAME_PARAM: if a.Reg == REGSP { diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index 0d832387d7f7dd..97b8f70c9b20e3 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -542,22 +542,28 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.autosize += 8 } - if c.autosize != 0 && c.autosize&(16-1) != 0 { - // The frame includes an LR. - // If the frame size is 8, it's only an LR, - // so there's no potential for breaking references to - // local variables by growing the frame size, - // because there are no local variables. - // But otherwise, if there is a non-empty locals section, - // the author of the code is responsible for making sure - // that the frame size is 8 mod 16. - if c.autosize == 8 { - c.autosize += 8 - c.cursym.Func.Locals += 8 + if c.autosize != 0 { + extrasize := int32(0) + if c.autosize%16 == 8 { + // Allocate extra 8 bytes on the frame top to save FP + extrasize = 8 + } else if c.autosize&(16-1) == 0 { + // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16 + extrasize = 16 } else { - c.ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, c.autosize-8) + c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) } + c.autosize += extrasize + c.cursym.Func.Locals += extrasize + + // low 32 bits for autosize + // high 32 bits for extrasize + p.To.Offset = int64(c.autosize) | int64(extrasize)<<32 + } else { + // NOFRAME + p.To.Offset = 0 } + if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { if c.ctxt.Debugvlog { c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name) @@ -565,9 +571,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.cursym.Func.Text.Mark |= LEAF } - // FP offsets need an updated p.To.Offset. - p.To.Offset = int64(c.autosize) - 8 - if cursym.Func.Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { @@ -631,6 +634,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.Spadj = aoffset } + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGFP + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REGSP + q1.To.Offset = -8 + + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = 8 + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGFP + } + if c.cursym.Func.Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // @@ -753,9 +776,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -c.autosize + + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + p = obj.Appendp(p, c.newprog) + p.As = ASUB + p.From.Type = obj.TYPE_CONST + p.From.Offset = 8 + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + } } } else { /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/ + + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + p.As = AMOVD + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGSP + p.From.Offset = -8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + p = obj.Appendp(p, c.newprog) + } + aoffset := c.autosize if aoffset > 0xF0 { @@ -814,7 +858,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.Spadj = int32(+p.From.Offset) } } - break case obj.AGETCALLERPC: if cursym.Leaf() { @@ -828,6 +871,112 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP } + + case obj.ADUFFCOPY: + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + // ADR ret_addr, R27 + // STP (FP, R27), -24(SP) + // SUB 24, SP, FP + // DUFFCOPY + // ret_addr: + // SUB 8, SP, FP + + q1 := p + // copy DUFFCOPY from q1 to q4 + q4 := obj.Appendp(p, c.newprog) + q4.Pos = p.Pos + q4.As = obj.ADUFFCOPY + q4.To = p.To + + q1.As = AADR + q1.From.Type = obj.TYPE_BRANCH + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R27 + + q2 := obj.Appendp(q1, c.newprog) + q2.Pos = p.Pos + q2.As = ASTP + q2.From.Type = obj.TYPE_REGREG + q2.From.Reg = REGFP + q2.From.Offset = int64(REG_R27) + q2.To.Type = obj.TYPE_MEM + q2.To.Reg = REGSP + q2.To.Offset = -24 + + // maintaine FP for DUFFCOPY + q3 := obj.Appendp(q2, c.newprog) + q3.Pos = p.Pos + q3.As = ASUB + q3.From.Type = obj.TYPE_CONST + q3.From.Offset = 24 + q3.Reg = REGSP + q3.To.Type = obj.TYPE_REG + q3.To.Reg = REGFP + + q5 := obj.Appendp(q4, c.newprog) + q5.Pos = p.Pos + q5.As = ASUB + q5.From.Type = obj.TYPE_CONST + q5.From.Offset = 8 + q5.Reg = REGSP + q5.To.Type = obj.TYPE_REG + q5.To.Reg = REGFP + q1.Pcond = q5 + p = q5 + } + + case obj.ADUFFZERO: + if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) { + // ADR ret_addr, R27 + // STP (FP, R27), -24(SP) + // SUB 24, SP, FP + // DUFFZERO + // ret_addr: + // SUB 8, SP, FP + + q1 := p + // copy DUFFZERO from q1 to q4 + q4 := obj.Appendp(p, c.newprog) + q4.Pos = p.Pos + q4.As = obj.ADUFFZERO + q4.To = p.To + + q1.As = AADR + q1.From.Type = obj.TYPE_BRANCH + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R27 + + q2 := obj.Appendp(q1, c.newprog) + q2.Pos = p.Pos + q2.As = ASTP + q2.From.Type = obj.TYPE_REGREG + q2.From.Reg = REGFP + q2.From.Offset = int64(REG_R27) + q2.To.Type = obj.TYPE_MEM + q2.To.Reg = REGSP + q2.To.Offset = -24 + + // maintaine FP for DUFFZERO + q3 := obj.Appendp(q2, c.newprog) + q3.Pos = p.Pos + q3.As = ASUB + q3.From.Type = obj.TYPE_CONST + q3.From.Offset = 24 + q3.Reg = REGSP + q3.To.Type = obj.TYPE_REG + q3.To.Reg = REGFP + + q5 := obj.Appendp(q4, c.newprog) + q5.Pos = p.Pos + q5.As = ASUB + q5.From.Type = obj.TYPE_CONST + q5.From.Offset = 8 + q5.Reg = REGSP + q5.To.Type = obj.TYPE_REG + q5.To.Reg = REGFP + q1.Pcond = q5 + p = q5 + } } } } diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go index a47e2f93a10e9f..ffd1c04d39fcd0 100644 --- a/src/cmd/internal/objabi/util.go +++ b/src/cmd/internal/objabi/util.go @@ -76,7 +76,7 @@ func init() { } func Framepointer_enabled(goos, goarch string) bool { - return framepointer_enabled != 0 && goarch == "amd64" && goos != "nacl" + return framepointer_enabled != 0 && (goarch == "amd64" && goos != "nacl" || goarch == "arm64" && goos == "linux") } func addexp(s string) { diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index 1b6d5d17047129..ba03cb707b84f7 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -1815,6 +1815,10 @@ func (ctxt *Link) dostkcheck() { ch.up = nil ch.limit = objabi.StackLimit - callsize(ctxt) + if objabi.GOARCH == "arm64" { + // need extra 8 bytes below SP to save FP + ch.limit -= 8 + } // Check every function, but do the nosplit functions in a first pass, // to make the printed failure chains as short as possible. diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index af389be9fee336..6a6a699241dc58 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -39,10 +39,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0 #endif MOVD $setg_gcc<>(SB), R1 // arg 1: setg MOVD g, R0 // arg 0: G + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL (R12) - MOVD _cgo_init(SB), R12 - CMP $0, R12 - BEQ nocgo + ADD $16, RSP nocgo: // update stackguard after _cgo_init @@ -107,6 +106,7 @@ TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8 MOVD buf+0(FP), R3 MOVD RSP, R0 MOVD R0, gobuf_sp(R3) + MOVD R29, gobuf_bp(R3) MOVD LR, gobuf_pc(R3) MOVD g, gobuf_g(R3) MOVD ZR, gobuf_lr(R3) @@ -128,10 +128,12 @@ TEXT runtime·gogo(SB), NOSPLIT, $24-8 MOVD 0(g), R4 // make sure g is not nil MOVD gobuf_sp(R5), R0 MOVD R0, RSP + MOVD gobuf_bp(R5), R29 MOVD gobuf_lr(R5), LR MOVD gobuf_ret(R5), R0 MOVD gobuf_ctxt(R5), R26 MOVD $0, gobuf_sp(R5) + MOVD $0, gobuf_bp(R5) MOVD $0, gobuf_ret(R5) MOVD $0, gobuf_lr(R5) MOVD $0, gobuf_ctxt(R5) @@ -147,6 +149,7 @@ TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 // Save caller state in g->sched MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(g) + MOVD R29, (g_sched+gobuf_bp)(g) MOVD LR, (g_sched+gobuf_pc)(g) MOVD $0, (g_sched+gobuf_lr)(g) MOVD g, (g_sched+gobuf_g)(g) @@ -163,6 +166,7 @@ TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 MOVD 0(R26), R4 // code pointer MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP // sp = m->g0->sched.sp + MOVD (g_sched+gobuf_bp)(g), R29 MOVD R3, -8(RSP) MOVD $0, -16(RSP) SUB $16, RSP @@ -211,6 +215,7 @@ switch: MOVD R6, (g_sched+gobuf_pc)(g) MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(g) + MOVD R29, (g_sched+gobuf_bp)(g) MOVD $0, (g_sched+gobuf_lr)(g) MOVD g, (g_sched+gobuf_g)(g) @@ -224,6 +229,7 @@ switch: MOVD $runtime·mstart(SB), R4 MOVD R4, 0(R3) MOVD R3, RSP + MOVD (g_sched+gobuf_bp)(g), R29 // call target function MOVD 0(R26), R3 // code pointer @@ -235,7 +241,9 @@ switch: BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP + MOVD (g_sched+gobuf_bp)(g), R29 MOVD $0, (g_sched+gobuf_sp)(g) + MOVD $0, (g_sched+gobuf_bp)(g) RET noswitch: @@ -244,6 +252,7 @@ noswitch: // at an intermediate systemstack. MOVD 0(R26), R3 // code pointer MOVD.P 16(RSP), R30 // restore LR + SUB $8, RSP, R29 // restore FP B (R3) /* @@ -278,6 +287,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 // Set g->sched to context in f MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(g) + MOVD R29, (g_sched+gobuf_bp)(g) MOVD LR, (g_sched+gobuf_pc)(g) MOVD R3, (g_sched+gobuf_lr)(g) MOVD R26, (g_sched+gobuf_ctxt)(g) @@ -294,6 +304,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP + MOVD (g_sched+gobuf_bp)(g), R29 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned) BL runtime·newstack(SB) @@ -843,8 +854,9 @@ TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16 // Save state of caller into g->sched. Smashes R0. TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0 MOVD LR, (g_sched+gobuf_pc)(g) - MOVD RSP, R0 + MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(g) + MOVD R29, (g_sched+gobuf_bp)(g) MOVD $0, (g_sched+gobuf_lr)(g) MOVD $0, (g_sched+gobuf_ret)(g) // Assert ctxt is zero. See func save. @@ -885,6 +897,7 @@ TEXT ·asmcgocall(SB),NOSPLIT,$0-20 BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP + MOVD (g_sched+gobuf_bp)(g), R29 MOVD R9, R0 // Now on a scheduling stack (a pthread-created stack). @@ -996,6 +1009,7 @@ needm: MOVD m_g0(R8), R3 MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(R3) + MOVD R29, (g_sched+gobuf_bp)(R3) havem: // Now there's a valid m, and we're running on its m->g0. @@ -1003,7 +1017,7 @@ havem: // Save current sp in m->g0->sched.sp in preparation for // switch back to m->curg stack. // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP). - // Beware that the frame size is actually 32. + // Beware that the frame size is actually 32+16. MOVD m_g0(R8), R3 MOVD (g_sched+gobuf_sp)(R3), R4 MOVD R4, savedsp-16(SP) @@ -1030,10 +1044,12 @@ havem: BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 MOVD (g_sched+gobuf_pc)(g), R5 - MOVD R5, -(24+8)(R4) + MOVD R5, -48(R4) + MOVD (g_sched+gobuf_bp)(g), R5 + MOVD R5, -56(R4) MOVD ctxt+24(FP), R0 - MOVD R0, -(16+8)(R4) - MOVD $-(24+8)(R4), R0 // maintain 16-byte SP alignment + MOVD R0, -40(R4) + MOVD $-48(R4), R0 // maintain 16-byte SP alignment MOVD R0, RSP BL runtime·cgocallbackg(SB) @@ -1041,7 +1057,7 @@ havem: MOVD 0(RSP), R5 MOVD R5, (g_sched+gobuf_pc)(g) MOVD RSP, R4 - ADD $(24+8), R4, R4 + ADD $48, R4, R4 MOVD R4, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index c85033f4bccff6..86bd2fb01c0966 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -268,7 +268,8 @@ func cgocallbackg1(ctxt uintptr) { case "arm64": // On arm64, stack frame is four words and there's a saved LR between // SP and the stack frame and between the stack frame and the arguments. - cb = (*args)(unsafe.Pointer(sp + 5*sys.PtrSize)) + // Additional two words (16-byte alignment) are for saving FP. + cb = (*args)(unsafe.Pointer(sp + 7*sys.PtrSize)) case "amd64": // On amd64, stack frame is two words, plus caller PC. if framepointer_enabled { diff --git a/src/runtime/rt0_darwin_arm64.s b/src/runtime/rt0_darwin_arm64.s index d039a8e0abab27..e3972f492429af 100644 --- a/src/runtime/rt0_darwin_arm64.s +++ b/src/runtime/rt0_darwin_arm64.s @@ -49,7 +49,9 @@ TEXT _rt0_arm64_darwin_lib(SB),NOSPLIT,$168 MOVD _cgo_sys_thread_create(SB), R4 MOVD $_rt0_arm64_darwin_lib_go(SB), R0 MOVD $0, R1 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL (R4) + ADD $16, RSP // Restore callee-save registers. MOVD 24(RSP), R19 diff --git a/src/runtime/rt0_linux_arm64.s b/src/runtime/rt0_linux_arm64.s index 458f082159dae9..a6bc99df569309 100644 --- a/src/runtime/rt0_linux_arm64.s +++ b/src/runtime/rt0_linux_arm64.s @@ -48,7 +48,9 @@ TEXT _rt0_arm64_linux_lib(SB),NOSPLIT,$184 BEQ nocgo MOVD $_rt0_arm64_linux_lib_go(SB), R0 MOVD $0, R1 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL (R4) + ADD $16, RSP B restore nocgo: diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s index c6afd76a657e27..1c8fce3db649b0 100644 --- a/src/runtime/sys_linux_arm64.s +++ b/src/runtime/sys_linux_arm64.s @@ -239,7 +239,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$24-8 MOVD (g_sched+gobuf_sp)(R3), R1 // Set RSP to g0 stack noswitch: - SUB $16, R1 + SUB $32, R1 BIC $15, R1 MOVD R1, RSP @@ -298,7 +298,9 @@ TEXT runtime·callCgoSigaction(SB),NOSPLIT,$0 MOVD new+8(FP), R1 MOVD old+16(FP), R2 MOVD _cgo_sigaction(SB), R3 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL R3 + ADD $16, RSP MOVW R0, ret+24(FP) RET @@ -361,7 +363,9 @@ TEXT runtime·callCgoMmap(SB),NOSPLIT,$0 MOVW fd+24(FP), R4 MOVW off+28(FP), R5 MOVD _cgo_mmap(SB), R9 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL R9 + ADD $16, RSP MOVD R0, ret+32(FP) RET @@ -382,7 +386,9 @@ TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 MOVD addr+0(FP), R0 MOVD n+8(FP), R1 MOVD _cgo_munmap(SB), R9 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL R9 + ADD $16, RSP RET TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 8370fd75937edd..4c2010493a76a8 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -285,7 +285,7 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in // If framepointer_enabled and there's a frame, then // there's a saved bp here. - if framepointer_enabled && GOARCH == "amd64" && frame.varp > frame.sp { + if frame.varp > frame.sp && (framepointer_enabled && GOARCH == "amd64" || GOARCH == "arm64") { frame.varp -= sys.RegSize } diff --git a/test/codegen/stack.go b/test/codegen/stack.go index 7e12dbc0eb6bbe..0f2f6178c7f4cb 100644 --- a/test/codegen/stack.go +++ b/test/codegen/stack.go @@ -16,7 +16,7 @@ import "runtime" // 386:"TEXT\t.*, [$]0-" // amd64:"TEXT\t.*, [$]0-" // arm:"TEXT\t.*, [$]-4-" -// arm64:"TEXT\t.*, [$]-8-" +// arm64:"TEXT\t.*, [$]0-" // mips:"TEXT\t.*, [$]-4-" // ppc64le:"TEXT\t.*, [$]0-" // s390x:"TEXT\t.*, [$]0-" @@ -35,7 +35,7 @@ type T struct { // 386:"TEXT\t.*, [$]0-" // amd64:"TEXT\t.*, [$]0-" // arm:"TEXT\t.*, [$]0-" (spills return address) -// arm64:"TEXT\t.*, [$]-8-" +// arm64:"TEXT\t.*, [$]0-" // mips:"TEXT\t.*, [$]-4-" // ppc64le:"TEXT\t.*, [$]0-" // s390x:"TEXT\t.*, [$]0-" @@ -50,7 +50,7 @@ func ZeroLargeStruct(x *T) { // - 386 fails due to spilling a register // amd64:"TEXT\t.*, [$]0-" // arm:"TEXT\t.*, [$]0-" (spills return address) -// arm64:"TEXT\t.*, [$]-8-" +// arm64:"TEXT\t.*, [$]0-" // ppc64le:"TEXT\t.*, [$]0-" // s390x:"TEXT\t.*, [$]0-" // Note: that 386 currently has to spill a register. @@ -64,7 +64,7 @@ func KeepWanted(t *T) { // - 386 fails due to spilling a register // - arm & mips fail due to softfloat calls // amd64:"TEXT\t.*, [$]0-" -// arm64:"TEXT\t.*, [$]-8-" +// arm64:"TEXT\t.*, [$]0-" // ppc64le:"TEXT\t.*, [$]0-" // s390x:"TEXT\t.*, [$]0-" func ArrayAdd64(a, b [4]float64) [4]float64 { @@ -76,7 +76,7 @@ func ArrayAdd64(a, b [4]float64) [4]float64 { // 386:"TEXT\t.*, [$]0-" // amd64:"TEXT\t.*, [$]0-" // arm:"TEXT\t.*, [$]0-" (spills return address) -// arm64:"TEXT\t.*, [$]-8-" +// arm64:"TEXT\t.*, [$]0-" // mips:"TEXT\t.*, [$]-4-" // ppc64le:"TEXT\t.*, [$]0-" // s390x:"TEXT\t.*, [$]0-" diff --git a/test/nosplit.go b/test/nosplit.go index e6cd04e563060d..8b61c9e96d1429 100644 --- a/test/nosplit.go +++ b/test/nosplit.go @@ -118,11 +118,11 @@ main 136 # (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.) main 96 nosplit main 100 nosplit; REJECT ppc64 ppc64le -main 104 nosplit; REJECT ppc64 ppc64le +main 104 nosplit; REJECT ppc64 ppc64le arm64 main 108 nosplit; REJECT ppc64 ppc64le -main 112 nosplit; REJECT ppc64 ppc64le +main 112 nosplit; REJECT ppc64 ppc64le arm64 main 116 nosplit; REJECT ppc64 ppc64le -main 120 nosplit; REJECT ppc64 ppc64le amd64 +main 120 nosplit; REJECT ppc64 ppc64le amd64 arm64 main 124 nosplit; REJECT ppc64 ppc64le amd64 main 128 nosplit; REJECT main 132 nosplit; REJECT @@ -136,11 +136,11 @@ main 136 nosplit; REJECT # Because AMD64 uses frame pointer, it has 8 fewer bytes. main 96 nosplit call f; f 0 nosplit main 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le -main 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le +main 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64 main 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le -main 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 +main 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64 main 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 -main 120 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 +main 124 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64 main 124 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 386 main 128 nosplit call f; f 0 nosplit; REJECT main 132 nosplit call f; f 0 nosplit; REJECT @@ -152,11 +152,11 @@ main 136 nosplit call f; f 0 nosplit; REJECT # Architectures differ in the same way as before. main 96 nosplit call f; f 0 call f main 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le -main 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 +main 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64 main 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 -main 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 +main 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64 main 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 -main 120 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386 +main 120 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386 arm64 main 124 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386 main 128 nosplit call f; f 0 call f; REJECT main 132 nosplit call f; f 0 call f; REJECT @@ -165,11 +165,11 @@ main 136 nosplit call f; f 0 call f; REJECT # Indirect calls are assumed to be splitting functions. main 96 nosplit callind main 100 nosplit callind; REJECT ppc64 ppc64le -main 104 nosplit callind; REJECT ppc64 ppc64le amd64 +main 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64 main 108 nosplit callind; REJECT ppc64 ppc64le amd64 -main 112 nosplit callind; REJECT ppc64 ppc64le amd64 +main 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64 main 116 nosplit callind; REJECT ppc64 ppc64le amd64 -main 120 nosplit callind; REJECT ppc64 ppc64le amd64 386 +main 120 nosplit callind; REJECT ppc64 ppc64le amd64 386 arm64 main 124 nosplit callind; REJECT ppc64 ppc64le amd64 386 main 128 nosplit callind; REJECT main 132 nosplit callind; REJECT @@ -319,7 +319,7 @@ TestCases: } } - if size%ptrSize == 4 || goarch == "arm64" && size != 0 && (size+8)%16 != 0 { + if size%ptrSize == 4 { continue TestCases } nosplit := m[3]