ソースを参照

fix: symbol mismatch in object file (#4171)

* fix: symbol kind in obj

* actually patch TLS

* update docs and cleanup size impl for tls

* remove debug logs

---------

Co-authored-by: Jonathan Kelley <jkelleyrtp@gmail.com>
Daniel Gallups 1 ヶ月 前
コミット
8216e4f177
2 ファイル変更108 行追加31 行削除
  1. 75 27
      packages/cli/src/build/patch.rs
  2. 33 4
      packages/subsecond/subsecond/src/lib.rs

+ 75 - 27
packages/cli/src/build/patch.rs

@@ -15,7 +15,7 @@ use std::{
     sync::{Arc, RwLock},
 };
 use subsecond_types::{AddressMap, JumpTable};
-use target_lexicon::{Architecture, OperatingSystem, Triple};
+use target_lexicon::{Architecture, OperatingSystem, PointerWidth, Triple};
 use thiserror::Error;
 use walrus::{
     ConstExpr, DataKind, ElementItems, ElementKind, FunctionBuilder, FunctionId, FunctionKind,
@@ -83,6 +83,7 @@ pub struct CachedSymbol {
     pub kind: SymbolKind,
     pub is_undefined: bool,
     pub is_weak: bool,
+    pub size: u64,
 }
 
 impl PartialEq for HotpatchModuleCache {
@@ -137,6 +138,7 @@ impl HotpatchModuleCache {
                                     },
                                     is_undefined,
                                     is_weak: false,
+                                    size: 0,
                                 },
                             );
                         }
@@ -155,6 +157,7 @@ impl HotpatchModuleCache {
                                     kind: SymbolKind::Data,
                                     is_undefined,
                                     is_weak: false,
+                                    size: 0,
                                 },
                             );
                         }
@@ -256,6 +259,7 @@ impl HotpatchModuleCache {
                                 is_undefined: s.is_undefined(),
                                 is_weak: s.is_weak(),
                                 kind: s.kind(),
+                                size: s.size(),
                             },
                         ))
                     })
@@ -905,32 +909,7 @@ pub fn create_undefined_symbol_stub(
             // Unfortunately this isn't simply cross-platform, so we need to handle Unix and Windows
             // calling conventions separately. It also depends on the architecture, making it even more
             // complicated.
-            //
-            // Rust code typically generates Tls symbols as functions (text), so we handle them as jumps too.
-            // Figured this out by checking the disassembly of the symbols causing the violation.
-            // ```
-            // __ZN17crossbeam_channel5waker17current_thread_id9THREAD_ID29_$u7b$$u7b$constant$u7d$$u7d$28_$u7b$$u7b$closure$u7d$$u7d$17h33618d877d86bb77E:
-            //    stp     x20, x19, [sp, #-0x20]!
-            //    stp     x29, x30, [sp, #0x10]
-            //    add     x29, sp, #0x10
-            //    adrp    x19, 21603 ; 0x1054bd000
-            //    add     x19, x19, #0x998
-            //    ldr     x20, [x19]
-            //    mov     x0, x19
-            //    blr     x20
-            //    ldr     x8, [x0]
-            //    cbz     x8, 0x10005acc0
-            //    mov     x0, x19
-            //    blr     x20
-            //    ldp     x29, x30, [sp, #0x10]
-            //    ldp     x20, x19, [sp], #0x20
-            //    ret
-            //    mov     x0, x19
-            //    blr     x20
-            //    bl      __ZN3std3sys12thread_local6native4lazy20Storage$LT$T$C$D$GT$10initialize17h818476638edff4e6E
-            //    b       0x10005acac
-            // ```
-            SymbolKind::Text | SymbolKind::Tls => {
+            SymbolKind::Text => {
                 let jump_asm = match triple.operating_system {
                     // The windows ABI and calling convention is different than the SystemV ABI.
                     OperatingSystem::Windows => match triple.architecture {
@@ -1053,6 +1032,75 @@ pub fn create_undefined_symbol_stub(
                 });
             }
 
+            // Rust code typically generates Tls accessors as functions (text), but they are referenced
+            // indirectly as data symbols. We end up handling this by adding the TLS symbol as a data
+            // symbol with the initializer as the address of the original tls initializer. That way
+            // if new TLS are added at runtime, they get initialized properly, but otherwise, the
+            // tls initialization check (cbz) properly skips re-initialization on patches.
+            //
+            // ```
+            // __ZN17crossbeam_channel5waker17current_thread_id9THREAD_ID29_$u7b$$u7b$constant$u7d$$u7d$28_$u7b$$u7b$closure$u7d$$u7d$17h33618d877d86bb77E:
+            //    stp     x20, x19, [sp, #-0x20]!
+            //    stp     x29, x30, [sp, #0x10]
+            //    add     x29, sp, #0x10
+            //    adrp    x19, 21603 ; 0x1054bd000
+            //    add     x19, x19, #0x998
+            //    ldr     x20, [x19]
+            //    mov     x0, x19
+            //    blr     x20
+            //    ldr     x8, [x0]
+            //    cbz     x8, 0x10005acc0
+            //    mov     x0, x19
+            //    blr     x20
+            //    ldp     x29, x30, [sp, #0x10]
+            //    ldp     x20, x19, [sp], #0x20
+            //    ret
+            //    mov     x0, x19
+            //    blr     x20
+            //    bl      __ZN3std3sys12thread_local6native4lazy20Storage$LT$T$C$D$GT$10initialize17h818476638edff4e6E
+            //    b       0x10005acac
+            // ```
+            SymbolKind::Tls => {
+                let tls_section = obj.section_id(StandardSection::Tls);
+
+                let pointer_width = match triple.pointer_width().unwrap() {
+                    PointerWidth::U16 => 2,
+                    PointerWidth::U32 => 4,
+                    PointerWidth::U64 => 8,
+                };
+
+                let size = if sym.size == 0 {
+                    pointer_width
+                } else {
+                    sym.size
+                };
+
+                let align = size.min(pointer_width).next_power_of_two();
+                let mut init = vec![0u8; size as usize];
+
+                // write the contents of the symbol to the init vec
+                init.iter_mut()
+                    .zip(match triple.endianness() {
+                        Ok(target_lexicon::Endianness::Little) => abs_addr.to_le_bytes(),
+                        Ok(target_lexicon::Endianness::Big) => abs_addr.to_be_bytes(),
+                        _ => return Err(PatchError::UnsupportedPlatform(triple.to_string())),
+                    })
+                    .for_each(|(b, v)| *b = v);
+
+                let offset = obj.append_section_data(tls_section, &init, align);
+
+                obj.add_symbol(Symbol {
+                    name: name.as_bytes()[name_offset..].to_vec(),
+                    value: offset, // offset inside .tdata
+                    size,
+                    scope: SymbolScope::Linkage,
+                    kind: SymbolKind::Tls,
+                    weak: false,
+                    section: SymbolSection::Section(tls_section),
+                    flags: object::SymbolFlags::None,
+                });
+            }
+
             // We just assume all non-text symbols are data (globals, statics, etc)
             _ => {
                 // darwin statics show up as "unknown" symbols even though they are data symbols.

+ 33 - 4
packages/subsecond/subsecond/src/lib.rs

@@ -64,7 +64,17 @@
 //! Subsecond is only enabled when debug_assertions are enabled so you can safely ship your application
 //! with Subsecond enabled without worrying about the performance overhead.
 //!
-//! ## Globals and statics
+//! ## Workspace support
+//!
+//! Subsecond currently only patches the "tip" crate - ie the crate in which your `main.rs` is located.
+//! Changes to crates outside this crate will be ignored, which can be confusing. We plan to add full
+//! workspace support in the future, but for now be aware of this limitation. Crate setups that have
+//! a `main.rs` importing a `lib.rs` won't patch sensibly since the crate becomes a library for itself.
+//!
+//! This is due to limitations in rustc itself where the build-graph is non-deterministic and changes
+//! to functions that forward generics can cause a cascade of codegen changes.
+//!
+//! ## Globals, statics, and thread-locals
 //!
 //! Subsecond *does* support hot-reloading of globals, statics, and thread locals. However, there are several limitations:
 //!
@@ -75,6 +85,12 @@
 //! Subsecond purposefully handles statics this way since many libraries like Dioxus and Tokio rely
 //! on persistent global runtimes.
 //!
+//! HUGE WARNING: Currently, thread-locals in the "tip" crate (the one being patched) will seemingly
+//! reset to their initial value on new patches. This is because we don't currently bind thread-locals
+//! in the patches to their original addresses in the main program. If you rely on thread-locals heavily
+//! in your tip crate, you should be aware of this. Sufficiently complex setups might crash or even
+//! segfault. We plan to fix this in the future, but for now, you should be aware of this limitation.
+//!
 //! ## Struct layout and alignment
 //!
 //! Subsecond currently does not support hot-reloading of structs. This is because the generated code
@@ -84,14 +100,27 @@
 //! To mitigate this, framework authors can integrate with Subsecond to either dispose of the old struct
 //! or to re-allocate the struct in a way that is compatible with the new layout. This is called "re-instancing."
 //!
-//! Because Subsecond performs a safe panic if a stale function is called, you should never witness
-//! a crash due to a struct layout change. However, changing a struct's layout will likely cause a
-//! re-instantiation of the struct and potentially a loss of state.
+//! In practice, frameworks that implement subsecond patching properly will throw out the old state
+//! and thus you should never witness a segfault due to misalignment or size changes. Frameworks are
+//! encouraged to aggressively dispose of old state that might cause size and alignment changes.
 //!
 //! We'd like to lift this limitation in the future by providing utilities to re-instantiate structs,
 //! but for now it's up to the framework authors to handle this. For example, Dioxus apps simply throw
 //! out the old state and rebuild it from scratch.
 //!
+//! ## Pointer versioning
+//!
+//! Currently, Subsecond does not "version" function pointers. We have plans to provide this metadata
+//! so framework authors can safely memoize changes without much runtime overhead. Frameworks like
+//! Dioxus and Bevy circumvent this issue by using the TypeID of structs passed to hot functions as
+//! well as the `ptr_address` method on [`HotFn`] to determine if the function pointer has changed.
+//!
+//! Currently, the `ptr_address` method will always return the most up-to-date version of the function
+//! even if the function contents itself did not change. In essence, this is equivalent to a version
+//! of the function where every function is considered "new." This means that framework authors who
+//! integrate re-instancing in their apps might dispose of old state too aggressively. For now, this
+//! is the safer and more practical approach.
+//!
 //! ## Nesting Calls
 //!
 //! Subsecond calls are designed to be nested. This provides clean integration points to know exactly