Skip to content

Commit e30704c

Browse files
committed
Auto merge of #176 - alexcrichton:probestack2, r=alexcrichton
Tweak definition of probestack functions It looks like the old `__rust_probestack` routine is incompatible with newer linux kernels. My best guess for this is that the kernel's auto-growth logic is failing to trigger, causing what looks like a legitimate segfault to get delivered. My best guess for why *that's* happening is that the faulting address is below `%rsp`, whereas previously all faulting stack addresses were above `%rsp`. The probestack routine does not modify `%rsp` as it's probing the stack, and presumably newer kernels are interpreting this as a legitimate violation. This commit tweaks the probestack routine to instead update `%rsp` incrementally as probing happens. The ABI of the function, however, requires that `%rsp` isn't changed as part of the function so it's restored at the end to the previous value.
2 parents e9b258b + 2061072 commit e30704c

File tree

2 files changed

+30
-26
lines changed

2 files changed

+30
-26
lines changed

src/probestack.rs

+27-23
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
#![cfg(not(windows))] // Windows already has builtins to do this
4545

4646
#[naked]
47-
#[no_mangle]
47+
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
4848
#[cfg(target_arch = "x86_64")]
4949
pub unsafe extern fn __rust_probestack() {
5050
// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
@@ -53,36 +53,41 @@ pub unsafe extern fn __rust_probestack() {
5353
// The ABI here is that the stack frame size is located in `%eax`. Upon
5454
// return we're not supposed to modify `%esp` or `%eax`.
5555
asm!("
56-
lea 8(%rsp),%r11 // rsp before calling this routine -> r11
56+
mov %rax,%r11 // duplicate %rax as we're clobbering %r11
5757
58-
// Main loop, taken in one page increments. We're decrementing r11 by
58+
// Main loop, taken in one page increments. We're decrementing rsp by
5959
// a page each time until there's less than a page remaining. We're
6060
// guaranteed that this function isn't called unless there's more than a
61-
// page needed
61+
// page needed.
62+
//
63+
// Note that we're also testing against `8(%rsp)` to account for the 8
64+
// bytes pushed on the stack orginally with our return address. Using
65+
// `8(%rsp)` simulates us testing the stack pointer in the caller's
66+
// context.
6267
2:
68+
sub $$0x1000,%rsp
69+
test %rsp,8(%rsp)
6370
sub $$0x1000,%r11
64-
test %r11,(%r11)
65-
sub $$0x1000,%rax
66-
cmp $$0x1000,%rax
71+
cmp $$0x1000,%r11
6772
ja 2b
6873
6974
// Finish up the last remaining stack space requested, getting the last
70-
// bits out of rax
71-
sub %rax,%r11
72-
test %r11,(%r11)
75+
// bits out of r11
76+
sub %r11,%rsp
77+
test %rsp,8(%rsp)
7378
74-
// We now know that %r11 is (%rsp + 8 - %rax) so to recover rax
75-
// we calculate (%rsp + 8) - %r11 which will give us %rax
76-
lea 8(%rsp),%rax
77-
sub %r11,%rax
79+
// Restore the stack pointer to what it previously was when entering
80+
// this function. The caller will readjust the stack pointer after we
81+
// return.
82+
add %rax,%rsp
7883
7984
ret
8085
");
8186
::core::intrinsics::unreachable();
8287
}
8388

8489
#[naked]
85-
#[no_mangle]
90+
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
8691
#[cfg(target_arch = "x86")]
8792
pub unsafe extern fn __rust_probestack() {
8893
// This is the same as x86_64 above, only translated for 32-bit sizes. Note
@@ -92,19 +97,18 @@ pub unsafe extern fn __rust_probestack() {
9297
// The ABI here is the same as x86_64, except everything is 32-bits large.
9398
asm!("
9499
push %ecx
95-
lea 8(%esp),%ecx
100+
mov %eax,%ecx
96101
2:
102+
sub $$0x1000,%esp
103+
test %esp,8(%esp)
97104
sub $$0x1000,%ecx
98-
test %ecx,(%ecx)
99-
sub $$0x1000,%eax
100-
cmp $$0x1000,%eax
105+
cmp $$0x1000,%ecx
101106
ja 2b
102107
103-
sub %eax,%ecx
104-
test %ecx,(%ecx)
108+
sub %ecx,%esp
109+
test %esp,8(%esp)
105110
106-
lea 8(%esp),%eax
107-
sub %ecx,%eax
111+
add %eax,%esp
108112
pop %ecx
109113
ret
110114
");

src/x86_64.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use core::intrinsics;
1010

1111
#[cfg(windows)]
1212
#[naked]
13-
#[no_mangle]
13+
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
1414
pub unsafe fn ___chkstk_ms() {
1515
asm!("push %rcx
1616
push %rax
@@ -34,7 +34,7 @@ pub unsafe fn ___chkstk_ms() {
3434

3535
#[cfg(windows)]
3636
#[naked]
37-
#[no_mangle]
37+
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
3838
pub unsafe fn __alloca() {
3939
asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx
4040
jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable");
@@ -43,7 +43,7 @@ pub unsafe fn __alloca() {
4343

4444
#[cfg(windows)]
4545
#[naked]
46-
#[no_mangle]
46+
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
4747
pub unsafe fn ___chkstk() {
4848
asm!("push %rcx
4949
cmp $$0x1000,%rax

0 commit comments

Comments
 (0)