diff --git a/Marlin/Configuration_adv.h b/Marlin/Configuration_adv.h
index 246664ff2a..c168240e24 100644
--- a/Marlin/Configuration_adv.h
+++ b/Marlin/Configuration_adv.h
@@ -1062,12 +1062,14 @@
  *
  * Zero Vibration (ZV) Input Shaping for X and/or Y movements.
  *
- * This option uses a lot of SRAM for the step buffer, which is proportional
- * to the largest step rate possible for any axis. If the build fails due to
+ * This option uses a lot of SRAM for the step buffer, which is related to the
+ * largest step rate possible for the shaped axes. If the build fails due to
  * low SRAM the buffer size may be reduced by setting smaller values for
- * DEFAULT_AXIS_STEPS_PER_UNIT and/or DEFAULT_MAX_FEEDRATE. Runtime editing
- * of max feedrate (M203) or resonant frequency (M593) may result feedrate
- * being capped to prevent buffer overruns.
+ * DEFAULT_AXIS_STEPS_PER_UNIT and/or DEFAULT_MAX_FEEDRATE. Disabling
+ * ADAPTIVE_STEP_SMOOTHING and reducing the step rate for non-shaped axes may
+ * also reduce the buffer sizes. Runtime editing of max feedrate (M203) or
+ * resonant frequency (M593) may result in input shaping losing effectiveness
+ * during high speed movements to prevent buffer overruns.
  *
  * Tune with M593 D<factor> F<frequency>:
  *
@@ -1077,13 +1079,18 @@
  *  X<1>         Set the given parameters only for the X axis.
  *  Y<1>         Set the given parameters only for the Y axis.
  */
-//#define INPUT_SHAPING
-#if ENABLED(INPUT_SHAPING)
-  #define SHAPING_FREQ_X    40  // (Hz) The dominant resonant frequency of the X axis.
-  #define SHAPING_FREQ_Y    40  // (Hz) The dominant resonant frequency of the Y axis.
-  #define SHAPING_ZETA_X  0.3f  // Damping ratio of the X axis (range: 0.0 = no damping to 1.0 = critical damping).
-  #define SHAPING_ZETA_Y  0.3f  // Damping ratio of the Y axis (range: 0.0 = no damping to 1.0 = critical damping).
-  //#define SHAPING_MENU        // Add a menu to the LCD to set shaping parameters.
+//#define INPUT_SHAPING_X
+//#define INPUT_SHAPING_Y
+#if EITHER(INPUT_SHAPING_X, INPUT_SHAPING_Y)
+  #if ENABLED(INPUT_SHAPING_X)
+    #define SHAPING_FREQ_X  40    // (Hz) The default dominant resonant frequency on the X axis.
+    #define SHAPING_ZETA_X  0.15f // Damping ratio of the X axis (range: 0.0 = no damping to 1.0 = critical damping).
+  #endif
+  #if ENABLED(INPUT_SHAPING_Y)
+    #define SHAPING_FREQ_Y  40    // (Hz) The default dominant resonant frequency on the Y axis.
+    #define SHAPING_ZETA_Y  0.15f // Damping ratio of the Y axis (range: 0.0 = no damping to 1.0 = critical damping).
+  #endif
+  //#define SHAPING_MENU          // Add a menu to the LCD to set shaping parameters.
 #endif
 
 #define AXIS_RELATIVE_MODES { false, false, false, false }
diff --git a/Marlin/src/gcode/feature/input_shaping/M593.cpp b/Marlin/src/gcode/feature/input_shaping/M593.cpp
index e1e99ca51b..040710f3e5 100644
--- a/Marlin/src/gcode/feature/input_shaping/M593.cpp
+++ b/Marlin/src/gcode/feature/input_shaping/M593.cpp
@@ -22,21 +22,21 @@
 
 #include "../../../inc/MarlinConfig.h"
 
-#if ENABLED(INPUT_SHAPING)
+#if HAS_SHAPING
 
 #include "../../gcode.h"
 #include "../../../module/stepper.h"
 
 void GcodeSuite::M593_report(const bool forReplay/*=true*/) {
   report_heading_etc(forReplay, F("Input Shaping"));
-  #if HAS_SHAPING_X
+  #if ENABLED(INPUT_SHAPING_X)
     SERIAL_ECHOLNPGM("  M593 X"
       " F", stepper.get_shaping_frequency(X_AXIS),
       " D", stepper.get_shaping_damping_ratio(X_AXIS)
     );
   #endif
-  #if HAS_SHAPING_Y
-    TERN_(HAS_SHAPING_X, report_echo_start(forReplay));
+  #if ENABLED(INPUT_SHAPING_Y)
+    TERN_(INPUT_SHAPING_X, report_echo_start(forReplay));
     SERIAL_ECHOLNPGM("  M593 Y"
       " F", stepper.get_shaping_frequency(Y_AXIS),
       " D", stepper.get_shaping_damping_ratio(Y_AXIS)
@@ -55,10 +55,10 @@ void GcodeSuite::M593_report(const bool forReplay/*=true*/) {
 void GcodeSuite::M593() {
   if (!parser.seen_any()) return M593_report();
 
-  const bool seen_X = TERN0(HAS_SHAPING_X, parser.seen_test('X')),
-             seen_Y = TERN0(HAS_SHAPING_Y, parser.seen_test('Y')),
-             for_X = seen_X || TERN0(HAS_SHAPING_X, (!seen_X && !seen_Y)),
-             for_Y = seen_Y || TERN0(HAS_SHAPING_Y, (!seen_X && !seen_Y));
+  const bool seen_X = TERN0(INPUT_SHAPING_X, parser.seen_test('X')),
+             seen_Y = TERN0(INPUT_SHAPING_Y, parser.seen_test('Y')),
+             for_X = seen_X || TERN0(INPUT_SHAPING_X, (!seen_X && !seen_Y)),
+             for_Y = seen_Y || TERN0(INPUT_SHAPING_Y, (!seen_X && !seen_Y));
 
   if (parser.seen('D')) {
     const float zeta = parser.value_float();
@@ -72,12 +72,13 @@ void GcodeSuite::M593() {
 
   if (parser.seen('F')) {
     const float freq = parser.value_float();
-    if (freq > 0) {
+    constexpr float max_freq = float(uint32_t(STEPPER_TIMER_RATE) / 2) / shaping_time_t(-2);
+    if (freq == 0.0f || freq > max_freq) {
       if (for_X) stepper.set_shaping_frequency(X_AXIS, freq);
       if (for_Y) stepper.set_shaping_frequency(Y_AXIS, freq);
     }
     else
-      SERIAL_ECHO_MSG("?Frequency (F) must be greater than 0");
+      SERIAL_ECHOLNPGM("?Frequency (F) must be greater than ", max_freq, " or 0 to disable");
   }
 }
 
diff --git a/Marlin/src/gcode/gcode.cpp b/Marlin/src/gcode/gcode.cpp
index ff066ed678..bb859d8026 100644
--- a/Marlin/src/gcode/gcode.cpp
+++ b/Marlin/src/gcode/gcode.cpp
@@ -933,7 +933,7 @@ void GcodeSuite::process_parsed_command(const bool no_ok/*=false*/) {
         case 575: M575(); break;                                  // M575: Set serial baudrate
       #endif
 
-      #if ENABLED(INPUT_SHAPING)
+      #if HAS_SHAPING
         case 593: M593(); break;                                  // M593: Set Input Shaping parameters
       #endif
 
diff --git a/Marlin/src/gcode/gcode.h b/Marlin/src/gcode/gcode.h
index 0ce8ab3902..5d56e53dd5 100644
--- a/Marlin/src/gcode/gcode.h
+++ b/Marlin/src/gcode/gcode.h
@@ -259,7 +259,7 @@
  * M554 - Get or set IP gateway. (Requires enabled Ethernet port)
  * M569 - Enable stealthChop on an axis. (Requires at least one _DRIVER_TYPE to be TMC2130/2160/2208/2209/5130/5160)
  * M575 - Change the serial baud rate. (Requires BAUD_RATE_GCODE)
- * M593 - Get or set input shaping parameters. (Requires INPUT_SHAPING)
+ * M593 - Get or set input shaping parameters. (Requires INPUT_SHAPING_[XY])
  * M600 - Pause for filament change: "M600 X<pos> Y<pos> Z<raise> E<first_retract> L<later_retract>". (Requires ADVANCED_PAUSE_FEATURE)
  * M603 - Configure filament change: "M603 T<tool> U<unload_length> L<load_length>". (Requires ADVANCED_PAUSE_FEATURE)
  * M605 - Set Dual X-Carriage movement mode: "M605 S<mode> [X<x_offset>] [R<temp_offset>]". (Requires DUAL_X_CARRIAGE)
@@ -1081,7 +1081,7 @@ private:
     static void M575();
   #endif
 
-  #if ENABLED(INPUT_SHAPING)
+  #if HAS_SHAPING
     static void M593();
     static void M593_report(const bool forReplay=true);
   #endif
diff --git a/Marlin/src/inc/Conditionals_adv.h b/Marlin/src/inc/Conditionals_adv.h
index d1b7a342d6..367f7f2324 100644
--- a/Marlin/src/inc/Conditionals_adv.h
+++ b/Marlin/src/inc/Conditionals_adv.h
@@ -1120,15 +1120,11 @@
 #endif
 
 // Input shaping
-#if ENABLED(INPUT_SHAPING)
-  #if !HAS_Y_AXIS
-    #undef SHAPING_FREQ_Y
-    #undef SHAPING_BUFFER_Y
-  #endif
-  #ifdef SHAPING_FREQ_X
-    #define HAS_SHAPING_X 1
-  #endif
-  #ifdef SHAPING_FREQ_Y
-    #define HAS_SHAPING_Y 1
-  #endif
+#if !HAS_Y_AXIS
+  #undef INPUT_SHAPING_Y
+  #undef SHAPING_FREQ_Y
+  #undef SHAPING_BUFFER_Y
+#endif
+#if EITHER(INPUT_SHAPING_X, INPUT_SHAPING_Y)
+  #define HAS_SHAPING 1
 #endif
diff --git a/Marlin/src/inc/SanityCheck.h b/Marlin/src/inc/SanityCheck.h
index b2e4dc9de0..42f1409739 100644
--- a/Marlin/src/inc/SanityCheck.h
+++ b/Marlin/src/inc/SanityCheck.h
@@ -4271,14 +4271,14 @@ static_assert(_PLUS_TEST(4), "HOMING_FEEDRATE_MM_M values must be positive.");
 #endif
 
 // Check requirements for Input Shaping
-#if ENABLED(INPUT_SHAPING) && defined(__AVR__)
-  #if HAS_SHAPING_X
+#if HAS_SHAPING && defined(__AVR__)
+  #if ENABLED(INPUT_SHAPING_X)
     #if F_CPU > 16000000
       static_assert((SHAPING_FREQ_X) * 2 * 0x10000 >= (STEPPER_TIMER_RATE), "SHAPING_FREQ_X is below the minimum (20) for AVR 20MHz.");
     #else
       static_assert((SHAPING_FREQ_X) * 2 * 0x10000 >= (STEPPER_TIMER_RATE), "SHAPING_FREQ_X is below the minimum (16) for AVR 16MHz.");
     #endif
-  #elif HAS_SHAPING_Y
+  #elif ENABLED(INPUT_SHAPING_Y)
     #if F_CPU > 16000000
       static_assert((SHAPING_FREQ_Y) * 2 * 0x10000 >= (STEPPER_TIMER_RATE), "SHAPING_FREQ_Y is below the minimum (20) for AVR 20MHz.");
     #else
@@ -4287,12 +4287,8 @@ static_assert(_PLUS_TEST(4), "HOMING_FEEDRATE_MM_M values must be positive.");
   #endif
 #endif
 
-#if ENABLED(INPUT_SHAPING)
-  #if ENABLED(DIRECT_STEPPING)
-    #error "INPUT_SHAPING cannot currently be used with DIRECT_STEPPING."
-  #elif ENABLED(LASER_FEATURE)
-    #error "INPUT_SHAPING cannot currently be used with LASER_FEATURE."
-  #endif
+#if BOTH(HAS_SHAPING, DIRECT_STEPPING)
+  #error "INPUT_SHAPING_[XY] cannot currently be used with DIRECT_STEPPING."
 #endif
 
 // Misc. Cleanup
diff --git a/Marlin/src/lcd/language/language_en.h b/Marlin/src/lcd/language/language_en.h
index 2ecf2def12..45861a8252 100644
--- a/Marlin/src/lcd/language/language_en.h
+++ b/Marlin/src/lcd/language/language_en.h
@@ -403,10 +403,10 @@ namespace Language_en {
   LSTR MSG_A_RETRACT                      = _UxGT("Retract Accel");
   LSTR MSG_A_TRAVEL                       = _UxGT("Travel Accel");
   LSTR MSG_INPUT_SHAPING                  = _UxGT("Input Shaping");
-  LSTR MSG_SHAPING_X_FREQ                 = STR_X _UxGT(" frequency");
-  LSTR MSG_SHAPING_Y_FREQ                 = STR_Y _UxGT(" frequency");
-  LSTR MSG_SHAPING_X_ZETA                 = STR_X _UxGT(" damping");
-  LSTR MSG_SHAPING_Y_ZETA                 = STR_Y _UxGT(" damping");
+  LSTR MSG_SHAPING_ENABLE                 = _UxGT("Enable @ shaping");
+  LSTR MSG_SHAPING_DISABLE                = _UxGT("Disable @ shaping");
+  LSTR MSG_SHAPING_FREQ                   = _UxGT("@ frequency");
+  LSTR MSG_SHAPING_ZETA                   = _UxGT("@ damping");
   LSTR MSG_XY_FREQUENCY_LIMIT             = _UxGT("XY Freq Limit");
   LSTR MSG_XY_FREQUENCY_FEEDRATE          = _UxGT("Min FR Factor");
   LSTR MSG_STEPS_PER_MM                   = _UxGT("Steps/mm");
diff --git a/Marlin/src/lcd/menu/menu_advanced.cpp b/Marlin/src/lcd/menu/menu_advanced.cpp
index 9d6d79efd7..875e74e8bb 100644
--- a/Marlin/src/lcd/menu/menu_advanced.cpp
+++ b/Marlin/src/lcd/menu/menu_advanced.cpp
@@ -545,24 +545,28 @@ void menu_backlash();
       START_MENU();
       BACK_ITEM(MSG_ADVANCED_SETTINGS);
 
-      // M593 F Frequency
-      #if HAS_SHAPING_X
+      // M593 F Frequency and D Damping ratio
+      #if ENABLED(INPUT_SHAPING_X)
         editable.decimal = stepper.get_shaping_frequency(X_AXIS);
-        EDIT_ITEM_FAST(float61, MSG_SHAPING_X_FREQ, &editable.decimal, min_frequency, 200.0f, []{ stepper.set_shaping_frequency(X_AXIS, editable.decimal); });
+        if (editable.decimal) {
+          ACTION_ITEM_N(X_AXIS, MSG_SHAPING_DISABLE, []{ stepper.set_shaping_frequency(X_AXIS, 0.0f); });
+          EDIT_ITEM_FAST_N(float61, X_AXIS, MSG_SHAPING_FREQ, &editable.decimal, min_frequency, 200.0f, []{ stepper.set_shaping_frequency(X_AXIS, editable.decimal); });
+          editable.decimal = stepper.get_shaping_damping_ratio(X_AXIS);
+          EDIT_ITEM_FAST_N(float42_52, X_AXIS, MSG_SHAPING_ZETA, &editable.decimal, 0.0f, 1.0f, []{ stepper.set_shaping_damping_ratio(X_AXIS, editable.decimal); });
+        }
+        else
+          ACTION_ITEM_N(X_AXIS, MSG_SHAPING_ENABLE, []{ stepper.set_shaping_frequency(X_AXIS, SHAPING_FREQ_X); });
       #endif
-      #if HAS_SHAPING_Y
+      #if ENABLED(INPUT_SHAPING_Y)
         editable.decimal = stepper.get_shaping_frequency(Y_AXIS);
-        EDIT_ITEM_FAST(float61, MSG_SHAPING_Y_FREQ, &editable.decimal, min_frequency, 200.0f, []{ stepper.set_shaping_frequency(Y_AXIS, editable.decimal); });
-      #endif
-
-      // M593 D Damping ratio
-      #if HAS_SHAPING_X
-        editable.decimal = stepper.get_shaping_damping_ratio(X_AXIS);
-        EDIT_ITEM_FAST(float42_52, MSG_SHAPING_X_ZETA, &editable.decimal, 0.0f, 1.0f, []{ stepper.set_shaping_damping_ratio(X_AXIS, editable.decimal); });
-      #endif
-      #if HAS_SHAPING_Y
-        editable.decimal = stepper.get_shaping_damping_ratio(Y_AXIS);
-        EDIT_ITEM_FAST(float42_52, MSG_SHAPING_Y_ZETA, &editable.decimal, 0.0f, 1.0f, []{ stepper.set_shaping_damping_ratio(Y_AXIS, editable.decimal); });
+        if (editable.decimal) {
+          ACTION_ITEM_N(Y_AXIS, MSG_SHAPING_DISABLE, []{ stepper.set_shaping_frequency(Y_AXIS, 0.0f); });
+          EDIT_ITEM_FAST_N(float61, Y_AXIS, MSG_SHAPING_FREQ, &editable.decimal, min_frequency, 200.0f, []{ stepper.set_shaping_frequency(Y_AXIS, editable.decimal); });
+          editable.decimal = stepper.get_shaping_damping_ratio(Y_AXIS);
+          EDIT_ITEM_FAST_N(float42_52, Y_AXIS, MSG_SHAPING_ZETA, &editable.decimal, 0.0f, 1.0f, []{ stepper.set_shaping_damping_ratio(Y_AXIS, editable.decimal); });
+        }
+        else
+          ACTION_ITEM_N(Y_AXIS, MSG_SHAPING_ENABLE, []{ stepper.set_shaping_frequency(Y_AXIS, SHAPING_FREQ_Y); });
       #endif
 
       END_MENU();
diff --git a/Marlin/src/module/planner.cpp b/Marlin/src/module/planner.cpp
index 0128d90f0f..ed85045098 100644
--- a/Marlin/src/module/planner.cpp
+++ b/Marlin/src/module/planner.cpp
@@ -1724,6 +1724,13 @@ float Planner::triggered_position_mm(const AxisEnum axis) {
   return result * mm_per_step[axis];
 }
 
+bool Planner::busy() {
+  return (has_blocks_queued() || cleaning_buffer_counter
+      || TERN0(EXTERNAL_CLOSED_LOOP_CONTROLLER, CLOSED_LOOP_WAITING())
+      || TERN0(HAS_SHAPING, stepper.input_shaping_busy())
+  );
+}
+
 void Planner::finish_and_disable() {
   while (has_blocks_queued() || cleaning_buffer_counter) idle();
   stepper.disable_all_steppers();
@@ -2483,14 +2490,6 @@ bool Planner::_populate_block(
 
   #endif // XY_FREQUENCY_LIMIT
 
-  #if ENABLED(INPUT_SHAPING)
-    const float top_freq = _MIN(float(0x7FFFFFFFL)
-                                OPTARG(HAS_SHAPING_X, stepper.get_shaping_frequency(X_AXIS))
-                                OPTARG(HAS_SHAPING_Y, stepper.get_shaping_frequency(Y_AXIS))),
-                max_factor = (top_freq * float(shaping_dividends - 3) * 2.0f) / block->nominal_rate;
-    NOMORE(speed_factor, max_factor);
-  #endif
-
   // Correct the speed
   if (speed_factor < 1.0f) {
     current_speed *= speed_factor;
diff --git a/Marlin/src/module/planner.h b/Marlin/src/module/planner.h
index 32b5a8795b..dcfdb1c28e 100644
--- a/Marlin/src/module/planner.h
+++ b/Marlin/src/module/planner.h
@@ -930,11 +930,7 @@ class Planner {
     static float triggered_position_mm(const AxisEnum axis);
 
     // Blocks are queued, or we're running out moves, or the closed loop controller is waiting
-    static bool busy() {
-      return (has_blocks_queued() || cleaning_buffer_counter
-          || TERN0(EXTERNAL_CLOSED_LOOP_CONTROLLER, CLOSED_LOOP_WAITING())
-      );
-    }
+    static bool busy();
 
     // Block until all buffered steps are executed / cleaned
     static void synchronize();
diff --git a/Marlin/src/module/settings.cpp b/Marlin/src/module/settings.cpp
index 921b0d91cb..4ae4c19922 100644
--- a/Marlin/src/module/settings.cpp
+++ b/Marlin/src/module/settings.cpp
@@ -580,11 +580,11 @@ typedef struct SettingsDataStruct {
   //
   // Input Shaping
   //
-  #if HAS_SHAPING_X
+  #if ENABLED(INPUT_SHAPING_X)
     float shaping_x_frequency, // M593 X F
           shaping_x_zeta;      // M593 X D
   #endif
-  #if HAS_SHAPING_Y
+  #if ENABLED(INPUT_SHAPING_Y)
     float shaping_y_frequency, // M593 Y F
           shaping_y_zeta;      // M593 Y D
   #endif
@@ -1617,12 +1617,12 @@ void MarlinSettings::postprocess() {
     //
     // Input Shaping
     ///
-    #if ENABLED(INPUT_SHAPING)
-      #if HAS_SHAPING_X
+    #if HAS_SHAPING
+      #if ENABLED(INPUT_SHAPING_X)
         EEPROM_WRITE(stepper.get_shaping_frequency(X_AXIS));
         EEPROM_WRITE(stepper.get_shaping_damping_ratio(X_AXIS));
       #endif
-      #if HAS_SHAPING_Y
+      #if ENABLED(INPUT_SHAPING_Y)
         EEPROM_WRITE(stepper.get_shaping_frequency(Y_AXIS));
         EEPROM_WRITE(stepper.get_shaping_damping_ratio(Y_AXIS));
       #endif
@@ -2602,7 +2602,7 @@ void MarlinSettings::postprocess() {
       //
       // Input Shaping
       //
-      #if HAS_SHAPING_X
+      #if ENABLED(INPUT_SHAPING_X)
       {
         float _data[2];
         EEPROM_READ(_data);
@@ -2611,7 +2611,7 @@ void MarlinSettings::postprocess() {
       }
       #endif
 
-      #if HAS_SHAPING_Y
+      #if ENABLED(INPUT_SHAPING_Y)
       {
         float _data[2];
         EEPROM_READ(_data);
@@ -3389,12 +3389,12 @@ void MarlinSettings::reset() {
   //
   // Input Shaping
   //
-  #if ENABLED(INPUT_SHAPING)
-    #if HAS_SHAPING_X
+  #if HAS_SHAPING
+    #if ENABLED(INPUT_SHAPING_X)
       stepper.set_shaping_frequency(X_AXIS, SHAPING_FREQ_X);
       stepper.set_shaping_damping_ratio(X_AXIS, SHAPING_ZETA_X);
     #endif
-    #if HAS_SHAPING_Y
+    #if ENABLED(INPUT_SHAPING_Y)
       stepper.set_shaping_frequency(Y_AXIS, SHAPING_FREQ_Y);
       stepper.set_shaping_damping_ratio(Y_AXIS, SHAPING_ZETA_Y);
     #endif
@@ -3650,7 +3650,7 @@ void MarlinSettings::reset() {
     //
     // Input Shaping
     //
-    TERN_(INPUT_SHAPING, gcode.M593_report(forReplay));
+    TERN_(HAS_SHAPING, gcode.M593_report(forReplay));
 
     //
     // Linear Advance
diff --git a/Marlin/src/module/stepper.cpp b/Marlin/src/module/stepper.cpp
index 6cc40ccece..74e761dc64 100644
--- a/Marlin/src/module/stepper.cpp
+++ b/Marlin/src/module/stepper.cpp
@@ -232,17 +232,25 @@ uint32_t Stepper::advance_divisor = 0,
            Stepper::la_advance_steps = 0;
 #endif
 
-#if ENABLED(INPUT_SHAPING)
-  shaping_time_t                DelayTimeManager::now = 0;
-  ParamDelayQueue               Stepper::shaping_dividend_queue;
-  DelayQueue<shaping_dividends> Stepper::shaping_queue;
-  #if HAS_SHAPING_X
-    shaping_time_t DelayTimeManager::delay_x;
-    ShapeParams Stepper::shaping_x;
+#if HAS_SHAPING
+  shaping_time_t      ShapingQueue::now = 0;
+  shaping_time_t      ShapingQueue::times[shaping_echoes];
+  shaping_echo_axis_t ShapingQueue::echo_axes[shaping_echoes];
+  uint16_t            ShapingQueue::tail = 0;
+
+  #if ENABLED(INPUT_SHAPING_X)
+    shaping_time_t  ShapingQueue::delay_x;
+    shaping_time_t  ShapingQueue::peek_x_val = shaping_time_t(-1);
+    uint16_t        ShapingQueue::head_x = 0;
+    uint16_t        ShapingQueue::_free_count_x = shaping_echoes - 1;
+    ShapeParams     Stepper::shaping_x;
   #endif
-  #if HAS_SHAPING_Y
-    shaping_time_t DelayTimeManager::delay_y;
-    ShapeParams Stepper::shaping_y;
+  #if ENABLED(INPUT_SHAPING_Y)
+    shaping_time_t  ShapingQueue::delay_y;
+    shaping_time_t  ShapingQueue::peek_y_val = shaping_time_t(-1);
+    uint16_t        ShapingQueue::head_y = 0;
+    uint16_t        ShapingQueue::_free_count_y = shaping_echoes - 1;
+    ShapeParams     Stepper::shaping_y;
   #endif
 #endif
 
@@ -1479,20 +1487,10 @@ void Stepper::isr() {
     // Enable ISRs to reduce USART processing latency
     hal.isr_on();
 
-    #if ENABLED(INPUT_SHAPING)
-      // Speed limiting should ensure the buffers never get full. But if somehow they do, stutter rather than overflow.
-      if (!nextMainISR) {
-        TERN_(HAS_SHAPING_X, if (shaping_dividend_queue.free_count_x() == 0) nextMainISR = shaping_dividend_queue.peek_x() + 1);
-        TERN_(HAS_SHAPING_Y, if (shaping_dividend_queue.free_count_y() == 0) NOLESS(nextMainISR, shaping_dividend_queue.peek_y() + 1));
-        TERN_(HAS_SHAPING_X, if (shaping_queue.free_count_x() < steps_per_isr) NOLESS(nextMainISR, shaping_queue.peek_x() + 1));
-        TERN_(HAS_SHAPING_Y, if (shaping_queue.free_count_y() < steps_per_isr) NOLESS(nextMainISR, shaping_queue.peek_y() + 1));
-      }
-    #endif
+    TERN_(HAS_SHAPING, shaping_isr());                  // Do Shaper stepping, if needed
 
     if (!nextMainISR) pulse_phase_isr();                // 0 = Do coordinated axes Stepper pulses
 
-    TERN_(INPUT_SHAPING, shaping_isr());                // Do Shaper stepping, if needed
-
     #if ENABLED(LIN_ADVANCE)
       if (!nextAdvanceISR) {                            // 0 = Do Linear Advance E Stepper pulses
         advance_isr();
@@ -1523,10 +1521,8 @@ void Stepper::isr() {
     const uint32_t interval = _MIN(
       uint32_t(HAL_TIMER_TYPE_MAX),                           // Come back in a very long time
       nextMainISR                                             // Time until the next Pulse / Block phase
-      OPTARG(HAS_SHAPING_X, shaping_dividend_queue.peek_x())  // Time until next input shaping dividend change for X
-      OPTARG(HAS_SHAPING_Y, shaping_dividend_queue.peek_y())  // Time until next input shaping dividend change for Y
-      OPTARG(HAS_SHAPING_X, shaping_queue.peek_x())           // Time until next input shaping echo for X
-      OPTARG(HAS_SHAPING_Y, shaping_queue.peek_y())           // Time until next input shaping echo for Y
+      OPTARG(INPUT_SHAPING_X, ShapingQueue::peek_x())         // Time until next input shaping echo for X
+      OPTARG(INPUT_SHAPING_Y, ShapingQueue::peek_y())         // Time until next input shaping echo for Y
       OPTARG(LIN_ADVANCE, nextAdvanceISR)                     // Come back early for Linear Advance?
       OPTARG(INTEGRATED_BABYSTEPPING, nextBabystepISR)        // Come back early for Babystepping?
     );
@@ -1539,16 +1535,9 @@ void Stepper::isr() {
     //
 
     nextMainISR -= interval;
-
-    TERN_(INPUT_SHAPING, DelayTimeManager::decrement_delays(interval));
-
-    #if ENABLED(LIN_ADVANCE)
-      if (nextAdvanceISR != LA_ADV_NEVER) nextAdvanceISR -= interval;
-    #endif
-
-    #if ENABLED(INTEGRATED_BABYSTEPPING)
-      if (nextBabystepISR != BABYSTEP_NEVER) nextBabystepISR -= interval;
-    #endif
+    TERN_(HAS_SHAPING, ShapingQueue::decrement_delays(interval));
+    TERN_(LIN_ADVANCE, if (nextAdvanceISR != LA_ADV_NEVER) nextAdvanceISR -= interval);
+    TERN_(INTEGRATED_BABYSTEPPING, if (nextBabystepISR != BABYSTEP_NEVER) nextBabystepISR -= interval);
 
     /**
      * This needs to avoid a race-condition caused by interleaving
@@ -1636,11 +1625,16 @@ void Stepper::pulse_phase_isr() {
     abort_current_block = false;
     if (current_block) {
       discard_current_block();
-      #if ENABLED(INPUT_SHAPING)
-        shaping_dividend_queue.purge();
-        shaping_queue.purge();
-        TERN_(HAS_SHAPING_X, delta_error.x = 0);
-        TERN_(HAS_SHAPING_Y, delta_error.y = 0);
+      #if HAS_SHAPING
+        ShapingQueue::purge();
+        #if ENABLED(INPUT_SHAPING_X)
+          shaping_x.delta_error = 0;
+          shaping_x.last_block_end_pos = count_position.x;
+        #endif
+        #if ENABLED(INPUT_SHAPING_Y)
+          shaping_y.delta_error = 0;
+          shaping_y.last_block_end_pos = count_position.y;
+        #endif
       #endif
     }
   }
@@ -1676,31 +1670,48 @@ void Stepper::pulse_phase_isr() {
     #define PULSE_PREP(AXIS) do{ \
       delta_error[_AXIS(AXIS)] += advance_dividend[_AXIS(AXIS)]; \
       step_needed[_AXIS(AXIS)] = (delta_error[_AXIS(AXIS)] >= 0); \
-      if (step_needed[_AXIS(AXIS)]) { \
-        count_position[_AXIS(AXIS)] += count_direction[_AXIS(AXIS)]; \
+      if (step_needed[_AXIS(AXIS)]) \
         delta_error[_AXIS(AXIS)] -= advance_divisor; \
-      } \
     }while(0)
 
-    #define PULSE_PREP_SHAPING(AXIS, DIVIDEND) do{ \
-      delta_error[_AXIS(AXIS)] += (DIVIDEND); \
-      if ((MAXDIR(AXIS) && delta_error[_AXIS(AXIS)] <= -0x30000000L) || (MINDIR(AXIS) && delta_error[_AXIS(AXIS)] >= 0x30000000L)) { \
-        TBI(last_direction_bits, _AXIS(AXIS)); \
-        DIR_WAIT_BEFORE(); \
-        SET_STEP_DIR(AXIS); \
-        DIR_WAIT_AFTER(); \
-      } \
-      step_needed[_AXIS(AXIS)] = (MAXDIR(AXIS) && delta_error[_AXIS(AXIS)] >= 0x10000000L) || \
-                                 (MINDIR(AXIS) && delta_error[_AXIS(AXIS)] <= -0x10000000L); \
+    // With input shaping, direction changes can happen with almost only
+    // AWAIT_LOW_PULSE() and  DIR_WAIT_BEFORE() between steps. To work around
+    // the TMC2208 / TMC2225 shutdown bug (#16076), add a half step hysteresis
+    // in each direction. This results in the position being off by half an
+    // average half step during travel but correct at the end of each segment.
+    #if AXIS_DRIVER_TYPE_X(TMC2208) || AXIS_DRIVER_TYPE_X(TMC2208_STANDALONE)
+      #define HYSTERESIS_X 64
+    #else
+      #define HYSTERESIS_X 0
+    #endif
+    #if AXIS_DRIVER_TYPE_Y(TMC2208) || AXIS_DRIVER_TYPE_Y(TMC2208_STANDALONE)
+      #define HYSTERESIS_Y 64
+    #else
+      #define HYSTERESIS_Y 0
+    #endif
+    #define _HYSTERESIS(AXIS) HYSTERESIS_##AXIS
+    #define HYSTERESIS(AXIS) _HYSTERESIS(AXIS)
+
+    #define PULSE_PREP_SHAPING(AXIS, DELTA_ERROR, DIVIDEND) do{ \
       if (step_needed[_AXIS(AXIS)]) { \
-        count_position[_AXIS(AXIS)] += count_direction[_AXIS(AXIS)]; \
-        delta_error[_AXIS(AXIS)] += MAXDIR(AXIS) ? -0x20000000L : 0x20000000L; \
+        DELTA_ERROR += (DIVIDEND); \
+        if ((MAXDIR(AXIS) && DELTA_ERROR <= -(64 + HYSTERESIS(AXIS))) || (MINDIR(AXIS) && DELTA_ERROR >= (64 + HYSTERESIS(AXIS)))) { \
+          { USING_TIMED_PULSE(); START_TIMED_PULSE(); AWAIT_LOW_PULSE(); } \
+          TBI(last_direction_bits, _AXIS(AXIS)); \
+          DIR_WAIT_BEFORE(); \
+          SET_STEP_DIR(AXIS); \
+          DIR_WAIT_AFTER(); \
+        } \
+        step_needed[_AXIS(AXIS)] = DELTA_ERROR <= -(64 + HYSTERESIS(AXIS)) || DELTA_ERROR >= (64 + HYSTERESIS(AXIS)); \
+        if (step_needed[_AXIS(AXIS)]) \
+          DELTA_ERROR += MAXDIR(AXIS) ? -128 : 128; \
       } \
     }while(0)
 
     // Start an active pulse if needed
     #define PULSE_START(AXIS) do{ \
       if (step_needed[_AXIS(AXIS)]) { \
+        count_position[_AXIS(AXIS)] += count_direction[_AXIS(AXIS)]; \
         _APPLY_STEP(AXIS, !_INVERT_STEP_PIN(AXIS), 0); \
       } \
     }while(0)
@@ -1819,22 +1830,12 @@ void Stepper::pulse_phase_isr() {
     #endif // DIRECT_STEPPING
 
     if (!is_page) {
-      TERN_(INPUT_SHAPING, shaping_queue.enqueue());
-
       // Determine if pulses are needed
       #if HAS_X_STEP
-        #if HAS_SHAPING_X
-          PULSE_PREP_SHAPING(X, advance_dividend.x);
-        #else
-          PULSE_PREP(X);
-        #endif
+        PULSE_PREP(X);
       #endif
       #if HAS_Y_STEP
-        #if HAS_SHAPING_Y
-          PULSE_PREP_SHAPING(Y, advance_dividend.y);
-        #else
-          PULSE_PREP(Y);
-        #endif
+        PULSE_PREP(Y);
       #endif
       #if HAS_Z_STEP
         PULSE_PREP(Z);
@@ -1871,6 +1872,24 @@ void Stepper::pulse_phase_isr() {
           }
         #endif
       #endif
+
+      #if HAS_SHAPING
+        // record an echo if a step is needed in the primary bresenham
+        const bool x_step = TERN0(INPUT_SHAPING_X, shaping_x.enabled && step_needed[X_AXIS]),
+                   y_step = TERN0(INPUT_SHAPING_Y, shaping_y.enabled && step_needed[Y_AXIS]);
+        if (x_step || y_step)
+          ShapingQueue::enqueue(x_step, TERN0(INPUT_SHAPING_X, shaping_x.forward), y_step, TERN0(INPUT_SHAPING_Y, shaping_y.forward));
+
+        // do the first part of the secondary bresenham
+        #if ENABLED(INPUT_SHAPING_X)
+          if (shaping_x.enabled)
+            PULSE_PREP_SHAPING(X, shaping_x.delta_error, shaping_x.factor1 * (shaping_x.forward ? 1 : -1));
+        #endif
+        #if ENABLED(INPUT_SHAPING_Y)
+          if (shaping_y.enabled)
+            PULSE_PREP_SHAPING(Y, shaping_y.delta_error, shaping_y.factor1 * (shaping_y.forward ? 1 : -1));
+        #endif
+      #endif
     }
 
     #if ISR_MULTI_STEPS
@@ -1910,7 +1929,10 @@ void Stepper::pulse_phase_isr() {
     #endif
 
     #if ENABLED(MIXING_EXTRUDER)
-      if (step_needed.e) E_STEP_WRITE(mixer.get_next_stepper(), !INVERT_E_STEP_PIN);
+      if (step_needed.e) {
+        count_position[E_AXIS] += count_direction[E_AXIS];
+        E_STEP_WRITE(mixer.get_next_stepper(), !INVERT_E_STEP_PIN);
+      }
     #elif HAS_E0_STEP
       PULSE_START(E);
     #endif
@@ -1965,55 +1987,59 @@ void Stepper::pulse_phase_isr() {
   } while (--events_to_do);
 }
 
-#if ENABLED(INPUT_SHAPING)
+#if HAS_SHAPING
 
   void Stepper::shaping_isr() {
-    xyze_bool_t step_needed{0};
+    xy_bool_t step_needed{0};
 
-    const bool shapex = TERN0(HAS_SHAPING_X, !shaping_queue.peek_x()),
-               shapey = TERN0(HAS_SHAPING_Y, !shaping_queue.peek_y());
+    // Clear the echoes that are ready to process. If the buffers are too full and risk overflo, also apply echoes early.
+    TERN_(INPUT_SHAPING_X, step_needed[X_AXIS] = !ShapingQueue::peek_x() || ShapingQueue::free_count_x() < steps_per_isr);
+    TERN_(INPUT_SHAPING_Y, step_needed[Y_AXIS] = !ShapingQueue::peek_y() || ShapingQueue::free_count_y() < steps_per_isr);
 
-    #if HAS_SHAPING_X
-      if (!shaping_dividend_queue.peek_x()) shaping_x.dividend = shaping_dividend_queue.dequeue_x();
-    #endif
-    #if HAS_SHAPING_Y
-      if (!shaping_dividend_queue.peek_y()) shaping_y.dividend = shaping_dividend_queue.dequeue_y();
-    #endif
+    if (bool(step_needed)) while (true) {
+      #if ENABLED(INPUT_SHAPING_X)
+        if (step_needed[X_AXIS]) {
+          const bool forward = ShapingQueue::dequeue_x();
+          PULSE_PREP_SHAPING(X, shaping_x.delta_error, shaping_x.factor2 * (forward ? 1 : -1));
+          PULSE_START(X);
+        }
+      #endif
 
-    #if HAS_SHAPING_X
-      if (shapex) {
-        shaping_queue.dequeue_x();
-        PULSE_PREP_SHAPING(X, shaping_x.dividend);
-        PULSE_START(X);
+      #if ENABLED(INPUT_SHAPING_Y)
+        if (step_needed[Y_AXIS]) {
+          const bool forward = ShapingQueue::dequeue_y();
+          PULSE_PREP_SHAPING(Y, shaping_y.delta_error, shaping_y.factor2 * (forward ? 1 : -1));
+          PULSE_START(Y);
+        }
+      #endif
+
+      TERN_(I2S_STEPPER_STREAM, i2s_push_sample());
+
+      USING_TIMED_PULSE();
+      if (bool(step_needed)) {
+        #if ISR_MULTI_STEPS
+          START_TIMED_PULSE();
+          AWAIT_HIGH_PULSE();
+        #endif
+        #if ENABLED(INPUT_SHAPING_X)
+          PULSE_STOP(X);
+        #endif
+        #if ENABLED(INPUT_SHAPING_Y)
+          PULSE_STOP(Y);
+        #endif
       }
-    #endif
 
-    #if HAS_SHAPING_Y
-      if (shapey) {
-        shaping_queue.dequeue_y();
-        PULSE_PREP_SHAPING(Y, shaping_y.dividend);
-        PULSE_START(Y);
-      }
-    #endif
+      TERN_(INPUT_SHAPING_X, step_needed[X_AXIS] = !ShapingQueue::peek_x() || ShapingQueue::free_count_x() < steps_per_isr);
+      TERN_(INPUT_SHAPING_Y, step_needed[Y_AXIS] = !ShapingQueue::peek_y() || ShapingQueue::free_count_y() < steps_per_isr);
 
-    TERN_(I2S_STEPPER_STREAM, i2s_push_sample());
+      if (!bool(step_needed)) break;
 
-    if (shapex || shapey) {
-      #if ISR_MULTI_STEPS
-        USING_TIMED_PULSE();
-        START_TIMED_PULSE();
-        AWAIT_HIGH_PULSE();
-      #endif
-      #if HAS_SHAPING_X
-        if (shapex) PULSE_STOP(X);
-      #endif
-      #if HAS_SHAPING_Y
-        if (shapey) PULSE_STOP(Y);
-      #endif
+      START_TIMED_PULSE();
+      AWAIT_LOW_PULSE();
     }
   }
 
-#endif // INPUT_SHAPING
+#endif // HAS_SHAPING
 
 // Calculate timer interval, with all limits applied.
 uint32_t Stepper::calc_timer_interval(uint32_t step_rate) {
@@ -2462,79 +2488,55 @@ uint32_t Stepper::block_phase_isr() {
       acceleration_time = deceleration_time = 0;
 
       #if ENABLED(ADAPTIVE_STEP_SMOOTHING)
-        uint8_t oversampling = 0;                           // Assume no axis smoothing (via oversampling)
+        oversampling_factor = 0;                            // Assume no axis smoothing (via oversampling)
         // Decide if axis smoothing is possible
         uint32_t max_rate = current_block->nominal_rate;    // Get the step event rate
         while (max_rate < MIN_STEP_ISR_FREQUENCY) {         // As long as more ISRs are possible...
           max_rate <<= 1;                                   // Try to double the rate
           if (max_rate < MIN_STEP_ISR_FREQUENCY)            // Don't exceed the estimated ISR limit
-            ++oversampling;                                 // Increase the oversampling (used for left-shift)
+            ++oversampling_factor;                          // Increase the oversampling (used for left-shift)
         }
-        oversampling_factor = oversampling;                 // For all timer interval calculations
-      #else
-        constexpr uint8_t oversampling = 0;
       #endif
 
       // Based on the oversampling factor, do the calculations
-      step_event_count = current_block->step_event_count << oversampling;
+      step_event_count = current_block->step_event_count << oversampling_factor;
 
       // Initialize Bresenham delta errors to 1/2
-      #if HAS_SHAPING_X
-        const int32_t old_delta_error_x = delta_error.x;
-      #endif
-      #if HAS_SHAPING_Y
-        const int32_t old_delta_error_y = delta_error.y;
-      #endif
       delta_error = TERN_(LIN_ADVANCE, la_delta_error =) -int32_t(step_event_count);
 
       // Calculate Bresenham dividends and divisors
       advance_dividend = (current_block->steps << 1).asLong();
       advance_divisor = step_event_count << 1;
 
-      // for input shaped axes, advance_divisor is replaced with 0x40000000
-      // and steps are repeated twice so dividends have to be scaled and halved
-      // and the dividend is directional, i.e. signed
-      TERN_(HAS_SHAPING_X, advance_dividend.x = (uint64_t(current_block->steps.x) << 29) / step_event_count);
-      TERN_(HAS_SHAPING_X, if (TEST(current_block->direction_bits, X_AXIS)) advance_dividend.x *= -1);
-      TERN_(HAS_SHAPING_X, if (!shaping_queue.empty_x()) SET_BIT_TO(current_block->direction_bits, X_AXIS, TEST(last_direction_bits, X_AXIS)));
-      TERN_(HAS_SHAPING_Y, advance_dividend.y = (uint64_t(current_block->steps.y) << 29) / step_event_count);
-      TERN_(HAS_SHAPING_Y, if (TEST(current_block->direction_bits, Y_AXIS)) advance_dividend.y *= -1);
-      TERN_(HAS_SHAPING_Y, if (!shaping_queue.empty_y()) SET_BIT_TO(current_block->direction_bits, Y_AXIS, TEST(last_direction_bits, Y_AXIS)));
+      #if ENABLED(INPUT_SHAPING_X)
+        if (shaping_x.enabled) {
+          const int64_t steps = TEST(current_block->direction_bits, X_AXIS) ? -int64_t(current_block->steps.x) : int64_t(current_block->steps.x);
+          shaping_x.last_block_end_pos += steps;
 
-      // The scaling operation above introduces rounding errors which must now be removed.
-      // For this segment, there will be step_event_count calls to the Bresenham logic and the same number of echoes.
-      // For each pair of calls to the Bresenham logic, delta_error will increase by advance_dividend modulo 0x20000000
-      // so (e.g. for x) delta_error.x will end up changing by (advance_dividend.x * step_event_count) % 0x20000000.
-      // For a divisor which is a power of 2, modulo is the same as as a bitmask, i.e.
-      // (advance_dividend.x * step_event_count) & 0x1FFFFFFF.
-      // This segment's final change in delta_error should actually be zero so we need to increase delta_error by
-      // 0 - ((advance_dividend.x * step_event_count) & 0x1FFFFFFF)
-      // And this needs to be adjusted to the range -0x10000000 to 0x10000000.
-      // Adding and subtracting 0x10000000 inside the outside the modulo achieves this.
-      TERN_(HAS_SHAPING_X, delta_error.x = old_delta_error_x + 0x10000000L - ((0x10000000L + advance_dividend.x * step_event_count) & 0x1FFFFFFFUL));
-      TERN_(HAS_SHAPING_Y, delta_error.y = old_delta_error_y + 0x10000000L - ((0x10000000L + advance_dividend.y * step_event_count) & 0x1FFFFFFFUL));
-
-      // when there is damping, the signal and its echo have different amplitudes
-      #if ENABLED(HAS_SHAPING_X)
-        const int32_t echo_x = shaping_x.factor * (advance_dividend.x >> 7);
-      #endif
-      #if ENABLED(HAS_SHAPING_Y)
-        const int32_t echo_y = shaping_y.factor * (advance_dividend.y >> 7);
+          // If there are any remaining echos unprocessed, then direction change must
+          // be delayed and processed in PULSE_PREP_SHAPING. This will cause half a step
+          // to be missed, which will need recovering and this can be done through shaping_x.remainder.
+          shaping_x.forward = !TEST(current_block->direction_bits, X_AXIS);
+          if (!ShapingQueue::empty_x()) SET_BIT_TO(current_block->direction_bits, X_AXIS, TEST(last_direction_bits, X_AXIS));
+        }
       #endif
 
-      // plan the change of values for advance_dividend for the input shaping echoes
-      TERN_(INPUT_SHAPING, shaping_dividend_queue.enqueue(TERN0(HAS_SHAPING_X, echo_x), TERN0(HAS_SHAPING_Y, echo_y)));
-
-      // apply the adjustment to the primary signal
-      TERN_(HAS_SHAPING_X, advance_dividend.x -= echo_x);
-      TERN_(HAS_SHAPING_Y, advance_dividend.y -= echo_y);
+      // Y follows the same logic as X (but the comments aren't repeated)
+      #if ENABLED(INPUT_SHAPING_Y)
+        if (shaping_y.enabled) {
+          const int64_t steps = TEST(current_block->direction_bits, Y_AXIS) ? -int64_t(current_block->steps.y) : int64_t(current_block->steps.y);
+          shaping_y.last_block_end_pos += steps;
+          shaping_y.forward = !TEST(current_block->direction_bits, Y_AXIS);
+          if (!ShapingQueue::empty_y()) SET_BIT_TO(current_block->direction_bits, Y_AXIS, TEST(last_direction_bits, Y_AXIS));
+        }
+      #endif
 
       // No step events completed so far
       step_events_completed = 0;
 
       // Compute the acceleration and deceleration points
-      accelerate_until = current_block->accelerate_until << oversampling;
-      decelerate_after = current_block->decelerate_after << oversampling;
+      accelerate_until = current_block->accelerate_until << oversampling_factor;
+      decelerate_after = current_block->decelerate_after << oversampling_factor;
 
       TERN_(MIXING_EXTRUDER, mixer.stepper_setup(current_block->b_color));
 
@@ -2548,7 +2550,7 @@ uint32_t Stepper::block_phase_isr() {
         #endif
         if (current_block->la_advance_rate) {
           // apply LA scaling and discount the effect of frequency scaling
-          la_dividend = (advance_dividend.e << current_block->la_scaling) << oversampling;
+          la_dividend = (advance_dividend.e << current_block->la_scaling) << oversampling_factor;
         }
       #endif
 
@@ -2974,7 +2976,8 @@ void Stepper::init() {
   #endif
 }
 
-#if ENABLED(INPUT_SHAPING)
+#if HAS_SHAPING
+
   /**
    * Calculate a fixed point factor to apply to the signal and its echo
    * when shaping an axis.
@@ -2983,41 +2986,68 @@ void Stepper::init() {
     // from the damping ratio, get a factor that can be applied to advance_dividend for fixed point maths
     // for ZV, we use amplitudes 1/(1+K) and K/(1+K) where K = exp(-zeta * M_PI / sqrt(1.0f - zeta * zeta))
     // which can be converted to 1:7 fixed point with an excellent fit with a 3rd order polynomial
-    float shaping_factor;
-    if (zeta <= 0.0f) shaping_factor = 64.0f;
-    else if (zeta >= 1.0f) shaping_factor = 0.0f;
+    float factor2;
+    if (zeta <= 0.0f) factor2 = 64.0f;
+    else if (zeta >= 1.0f) factor2 = 0.0f;
     else {
-      shaping_factor = 64.44056192 + -99.02008832 * zeta;
+      factor2 = 64.44056192 + -99.02008832 * zeta;
       const float zeta2 = zeta * zeta;
-      shaping_factor += -7.58095488 * zeta2;
+      factor2 += -7.58095488 * zeta2;
       const float zeta3 = zeta2 * zeta;
-      shaping_factor += 43.073216 * zeta3;
+      factor2 += 43.073216 * zeta3;
+      factor2 = floor(factor2);
     }
 
     const bool was_on = hal.isr_state();
     hal.isr_off();
-    TERN_(HAS_SHAPING_X, if (axis == X_AXIS) { shaping_x.factor = floor(shaping_factor); shaping_x.zeta = zeta; })
-    TERN_(HAS_SHAPING_Y, if (axis == Y_AXIS) { shaping_y.factor = floor(shaping_factor); shaping_y.zeta = zeta; })
+    TERN_(INPUT_SHAPING_X, if (axis == X_AXIS) { shaping_x.factor2 = factor2; shaping_x.factor1 = 128 - factor2; shaping_x.zeta = zeta; })
+    TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) { shaping_y.factor2 = factor2; shaping_y.factor1 = 128 - factor2; shaping_y.zeta = zeta; })
     if (was_on) hal.isr_on();
   }
 
   float Stepper::get_shaping_damping_ratio(const AxisEnum axis) {
-    TERN_(HAS_SHAPING_X, if (axis == X_AXIS) return shaping_x.zeta);
-    TERN_(HAS_SHAPING_Y, if (axis == Y_AXIS) return shaping_y.zeta);
+    TERN_(INPUT_SHAPING_X, if (axis == X_AXIS) return shaping_x.zeta);
+    TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) return shaping_y.zeta);
     return -1;
   }
 
   void Stepper::set_shaping_frequency(const AxisEnum axis, const float freq) {
-    TERN_(HAS_SHAPING_X, if (axis == X_AXIS) { DelayTimeManager::set_delay(axis, float(uint32_t(STEPPER_TIMER_RATE) / 2) / freq); shaping_x.frequency = freq; })
-    TERN_(HAS_SHAPING_Y, if (axis == Y_AXIS) { DelayTimeManager::set_delay(axis, float(uint32_t(STEPPER_TIMER_RATE) / 2) / freq); shaping_y.frequency = freq; })
+    // enabling or disabling shaping whilst moving can result in lost steps
+    Planner::synchronize();
+
+    const bool was_on = hal.isr_state();
+    hal.isr_off();
+
+    const shaping_time_t delay = freq ? float(uint32_t(STEPPER_TIMER_RATE) / 2) / freq : shaping_time_t(-1);
+    #if ENABLED(INPUT_SHAPING_X)
+      if (axis == X_AXIS) {
+        ShapingQueue::set_delay(X_AXIS, delay);
+        shaping_x.frequency = freq;
+        shaping_x.enabled = !!freq;
+        shaping_x.delta_error = 0;
+        shaping_x.last_block_end_pos = count_position.x;
+      }
+    #endif
+    #if ENABLED(INPUT_SHAPING_Y)
+      if (axis == Y_AXIS) {
+        ShapingQueue::set_delay(Y_AXIS, delay);
+        shaping_y.frequency = freq;
+        shaping_y.enabled = !!freq;
+        shaping_y.delta_error = 0;
+        shaping_y.last_block_end_pos = count_position.y;
+      }
+    #endif
+
+    if (was_on) hal.isr_on();
   }
 
   float Stepper::get_shaping_frequency(const AxisEnum axis) {
-    TERN_(HAS_SHAPING_X, if (axis == X_AXIS) return shaping_x.frequency);
-    TERN_(HAS_SHAPING_Y, if (axis == Y_AXIS) return shaping_y.frequency);
+    TERN_(INPUT_SHAPING_X, if (axis == X_AXIS) return shaping_x.frequency);
+    TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) return shaping_y.frequency);
     return -1;
   }
-#endif
+
+#endif // HAS_SHAPING
 
 /**
  * Set the stepper positions directly in steps
@@ -3029,6 +3059,13 @@ void Stepper::init() {
  * derive the current XYZE position later on.
  */
 void Stepper::_set_position(const abce_long_t &spos) {
+  #if ENABLED(INPUT_SHAPING_X)
+    const int32_t x_shaping_delta = count_position.x - shaping_x.last_block_end_pos;
+  #endif
+  #if ENABLED(INPUT_SHAPING_Y)
+    const int32_t y_shaping_delta = count_position.y - shaping_y.last_block_end_pos;
+  #endif
+
   #if ANY(IS_CORE, MARKFORGED_XY, MARKFORGED_YX)
     #if CORE_IS_XY
       // corexy positioning
@@ -3058,6 +3095,19 @@ void Stepper::_set_position(const abce_long_t &spos) {
     // default non-h-bot planning
     count_position = spos;
   #endif
+
+  #if ENABLED(INPUT_SHAPING_X)
+    if (shaping_x.enabled) {
+      count_position.x += x_shaping_delta;
+      shaping_x.last_block_end_pos = spos.x;
+    }
+  #endif
+  #if ENABLED(INPUT_SHAPING_Y)
+    if (shaping_y.enabled) {
+      count_position.y += y_shaping_delta;
+      shaping_y.last_block_end_pos = spos.y;
+    }
+  #endif
 }
 
 /**
@@ -3097,6 +3147,8 @@ void Stepper::set_axis_position(const AxisEnum a, const int32_t &v) {
   #endif
 
   count_position[a] = v;
+  TERN_(INPUT_SHAPING_X, if (a == X_AXIS) shaping_x.last_block_end_pos = v);
+  TERN_(INPUT_SHAPING_Y, if (a == Y_AXIS) shaping_y.last_block_end_pos = v);
 
   #ifdef __AVR__
     // Reenable Stepper ISR
diff --git a/Marlin/src/module/stepper.h b/Marlin/src/module/stepper.h
index 5b634c52e4..f29bb346d1 100644
--- a/Marlin/src/module/stepper.h
+++ b/Marlin/src/module/stepper.h
@@ -75,8 +75,8 @@
    */
   #define TIMER_READ_ADD_AND_STORE_CYCLES 34UL
 
-  // The base ISR takes 792 cycles
-  #define ISR_BASE_CYCLES  792UL
+  // The base ISR
+  #define ISR_BASE_CYCLES 770UL
 
   // Linear advance base time is 64 cycles
   #if ENABLED(LIN_ADVANCE)
@@ -92,21 +92,25 @@
     #define ISR_S_CURVE_CYCLES 0UL
   #endif
 
+  // Input shaping base time
+  #if HAS_SHAPING
+    #define ISR_SHAPING_BASE_CYCLES 180UL
+  #else
+    #define ISR_SHAPING_BASE_CYCLES 0UL
+  #endif
+
   // Stepper Loop base cycles
   #define ISR_LOOP_BASE_CYCLES 4UL
 
-  // To start the step pulse, in the worst case takes
-  #define ISR_START_STEPPER_CYCLES 13UL
-
   // And each stepper (start + stop pulse) takes in worst case
-  #define ISR_STEPPER_CYCLES 16UL
+  #define ISR_STEPPER_CYCLES 100UL
 
 #else
   // Cycles to perform actions in START_TIMED_PULSE
   #define TIMER_READ_ADD_AND_STORE_CYCLES 13UL
 
-  // The base ISR takes 752 cycles
-  #define ISR_BASE_CYCLES  752UL
+  // The base ISR
+  #define ISR_BASE_CYCLES  1000UL
 
   // Linear advance base time is 32 cycles
   #if ENABLED(LIN_ADVANCE)
@@ -122,12 +126,16 @@
     #define ISR_S_CURVE_CYCLES 0UL
   #endif
 
+  // Input shaping base time
+  #if HAS_SHAPING
+    #define ISR_SHAPING_BASE_CYCLES 290UL
+  #else
+    #define ISR_SHAPING_BASE_CYCLES 0UL
+  #endif
+
   // Stepper Loop base cycles
   #define ISR_LOOP_BASE_CYCLES 32UL
 
-  // To start the step pulse, in the worst case takes
-  #define ISR_START_STEPPER_CYCLES 57UL
-
   // And each stepper (start + stop pulse) takes in worst case
   #define ISR_STEPPER_CYCLES 88UL
 
@@ -202,8 +210,12 @@
   #error "Expected at least one of MINIMUM_STEPPER_PULSE or MAXIMUM_STEPPER_RATE to be defined"
 #endif
 
-// But the user could be enforcing a minimum time, so the loop time is
-#define ISR_LOOP_CYCLES (ISR_LOOP_BASE_CYCLES + _MAX(MIN_STEPPER_PULSE_CYCLES, MIN_ISR_LOOP_CYCLES))
+// The loop takes the base time plus the time for all the bresenham logic for R pulses plus the time
+// between pulses for (R-1) pulses. But the user could be enforcing a minimum time so the loop time is:
+#define ISR_LOOP_CYCLES(R) ((ISR_LOOP_BASE_CYCLES + MIN_ISR_LOOP_CYCLES + MIN_STEPPER_PULSE_CYCLES) * (R - 1) + _MAX(MIN_ISR_LOOP_CYCLES, MIN_STEPPER_PULSE_CYCLES))
+
+// Model input shaping as an extra loop call
+#define ISR_SHAPING_LOOP_CYCLES(R) ((TERN0(HAS_SHAPING, ISR_LOOP_BASE_CYCLES) + TERN0(INPUT_SHAPING_X, ISR_X_STEPPER_CYCLES) + TERN0(INPUT_SHAPING_Y, ISR_Y_STEPPER_CYCLES)) * (R) + (MIN_ISR_LOOP_CYCLES) * (R - 1))
 
 // If linear advance is enabled, then it is handled separately
 #if ENABLED(LIN_ADVANCE)
@@ -228,7 +240,7 @@
 #endif
 
 // Now estimate the total ISR execution time in cycles given a step per ISR multiplier
-#define ISR_EXECUTION_CYCLES(R) (((ISR_BASE_CYCLES + ISR_S_CURVE_CYCLES + (ISR_LOOP_CYCLES) * (R) + ISR_LA_BASE_CYCLES + ISR_LA_LOOP_CYCLES)) / (R))
+#define ISR_EXECUTION_CYCLES(R) (((ISR_BASE_CYCLES + ISR_S_CURVE_CYCLES + ISR_SHAPING_BASE_CYCLES + ISR_LOOP_CYCLES(R) + ISR_SHAPING_LOOP_CYCLES(R) + ISR_LA_BASE_CYCLES + ISR_LA_LOOP_CYCLES)) / (R))
 
 // The maximum allowable stepping frequency when doing x128-x1 stepping (in Hz)
 #define MAX_STEP_ISR_FREQUENCY_128X ((F_CPU) / ISR_EXECUTION_CYCLES(128))
@@ -312,116 +324,142 @@ constexpr ena_mask_t enable_overlap[] = {
 
 //static_assert(!any_enable_overlap(), "There is some overlap.");
 
-#if ENABLED(INPUT_SHAPING)
-
-  typedef IF<ENABLED(__AVR__), uint16_t, uint32_t>::type shaping_time_t;
+#if HAS_SHAPING
 
   // These constexpr are used to calculate the shaping queue buffer sizes
   constexpr xyze_float_t max_feedrate = DEFAULT_MAX_FEEDRATE;
   constexpr xyze_float_t steps_per_unit = DEFAULT_AXIS_STEPS_PER_UNIT;
-  constexpr float max_steprate = _MAX(LOGICAL_AXIS_LIST(
-                                      max_feedrate.e * steps_per_unit.e,
-                                      max_feedrate.x * steps_per_unit.x,
-                                      max_feedrate.y * steps_per_unit.y,
-                                      max_feedrate.z * steps_per_unit.z,
-                                      max_feedrate.i * steps_per_unit.i,
-                                      max_feedrate.j * steps_per_unit.j,
-                                      max_feedrate.k * steps_per_unit.k,
-                                      max_feedrate.u * steps_per_unit.u,
-                                      max_feedrate.v * steps_per_unit.v,
-                                      max_feedrate.w * steps_per_unit.w
-                                    ));
-  constexpr uint16_t shaping_dividends = max_steprate / _MIN(0x7FFFFFFFL OPTARG(HAS_SHAPING_X, SHAPING_FREQ_X) OPTARG(HAS_SHAPING_Y, SHAPING_FREQ_Y)) / 2 + 3;
-  constexpr uint16_t shaping_segments = max_steprate / (MIN_STEPS_PER_SEGMENT) / _MIN(0x7FFFFFFFL OPTARG(HAS_SHAPING_X, SHAPING_FREQ_X) OPTARG(HAS_SHAPING_Y, SHAPING_FREQ_Y)) / 2 + 3;
+  // MIN_STEP_ISR_FREQUENCY is known at compile time on AVRs and any reduction in SRAM is welcome
+  #ifdef __AVR__
+    constexpr float max_isr_rate = _MAX(
+                                      LOGICAL_AXIS_LIST(
+                                        max_feedrate.e * steps_per_unit.e,
+                                        max_feedrate.x * steps_per_unit.x,
+                                        max_feedrate.y * steps_per_unit.y,
+                                        max_feedrate.z * steps_per_unit.z,
+                                        max_feedrate.i * steps_per_unit.i,
+                                        max_feedrate.j * steps_per_unit.j,
+                                        max_feedrate.k * steps_per_unit.k,
+                                        max_feedrate.u * steps_per_unit.u,
+                                        max_feedrate.v * steps_per_unit.v,
+                                        max_feedrate.w * steps_per_unit.w
+                                      )
+                                      OPTARG(ADAPTIVE_STEP_SMOOTHING, MIN_STEP_ISR_FREQUENCY)
+                                    );
+    constexpr float max_step_rate = _MIN(max_isr_rate,
+                                      TERN0(INPUT_SHAPING_X, max_feedrate.x * steps_per_unit.x) +
+                                      TERN0(INPUT_SHAPING_Y, max_feedrate.y * steps_per_unit.y)
+                                    );
+  #else
+    constexpr float max_step_rate = TERN0(INPUT_SHAPING_X, max_feedrate.x * steps_per_unit.x) +
+                                    TERN0(INPUT_SHAPING_Y, max_feedrate.y * steps_per_unit.y);
+  #endif
+  constexpr uint16_t shaping_echoes = max_step_rate / _MIN(0x7FFFFFFFL OPTARG(INPUT_SHAPING_X, SHAPING_FREQ_X) OPTARG(INPUT_SHAPING_Y, SHAPING_FREQ_Y)) / 2 + 3;
 
-  class DelayTimeManager {
+  typedef IF<ENABLED(__AVR__), uint16_t, uint32_t>::type shaping_time_t;
+  enum shaping_echo_t { ECHO_NONE = 0, ECHO_FWD = 1, ECHO_BWD = 2 };
+  struct shaping_echo_axis_t {
+    #if ENABLED(INPUT_SHAPING_X)
+      shaping_echo_t x:2;
+    #endif
+    #if ENABLED(INPUT_SHAPING_Y)
+      shaping_echo_t y:2;
+    #endif
+  };
+
+  class ShapingQueue {
     private:
-      static shaping_time_t now;
-      #ifdef HAS_SHAPING_X
-        static shaping_time_t delay_x;
+      static shaping_time_t       now;
+      static shaping_time_t       times[shaping_echoes];
+      static shaping_echo_axis_t  echo_axes[shaping_echoes];
+      static uint16_t             tail;
+
+      #if ENABLED(INPUT_SHAPING_X)
+        static shaping_time_t delay_x;    // = shaping_time_t(-1) to disable queueing
+        static shaping_time_t peek_x_val;
+        static uint16_t head_x;
+        static uint16_t _free_count_x;
       #endif
-      #ifdef HAS_SHAPING_Y
-        static shaping_time_t delay_y;
+      #if ENABLED(INPUT_SHAPING_Y)
+        static shaping_time_t delay_y;    // = shaping_time_t(-1) to disable queueing
+        static shaping_time_t peek_y_val;
+        static uint16_t head_y;
+        static uint16_t _free_count_y;
       #endif
+
     public:
-      static void decrement_delays(const shaping_time_t interval) { now += interval; }
+      static void decrement_delays(const shaping_time_t interval) {
+        now += interval;
+        TERN_(INPUT_SHAPING_X, if (peek_x_val != shaping_time_t(-1)) peek_x_val -= interval);
+        TERN_(INPUT_SHAPING_Y, if (peek_y_val != shaping_time_t(-1)) peek_y_val -= interval);
+      }
       static void set_delay(const AxisEnum axis, const shaping_time_t delay) {
-        TERN_(HAS_SHAPING_X, if (axis == X_AXIS) delay_x = delay);
-        TERN_(HAS_SHAPING_Y, if (axis == Y_AXIS) delay_y = delay);
+        TERN_(INPUT_SHAPING_X, if (axis == X_AXIS) delay_x = delay);
+        TERN_(INPUT_SHAPING_Y, if (axis == Y_AXIS) delay_y = delay);
       }
-  };
-
-  template<int SIZE>
-  class DelayQueue : public DelayTimeManager {
-    protected:
-      shaping_time_t times[SIZE];
-      uint16_t tail = 0 OPTARG(HAS_SHAPING_X, head_x = 0) OPTARG(HAS_SHAPING_Y, head_y = 0);
-
-    public:
-      void enqueue() {
+      static void enqueue(const bool x_step, const bool x_forward, const bool y_step, const bool y_forward) {
+        TERN_(INPUT_SHAPING_X, if (head_x == tail && x_step) peek_x_val = delay_x);
+        TERN_(INPUT_SHAPING_Y, if (head_y == tail && y_step) peek_y_val = delay_y);
         times[tail] = now;
-        if (++tail == SIZE) tail = 0;
+        TERN_(INPUT_SHAPING_X, echo_axes[tail].x = x_step ? (x_forward ? ECHO_FWD : ECHO_BWD) : ECHO_NONE);
+        TERN_(INPUT_SHAPING_Y, echo_axes[tail].y = y_step ? (y_forward ? ECHO_FWD : ECHO_BWD) : ECHO_NONE);
+        if (++tail == shaping_echoes) tail = 0;
+        TERN_(INPUT_SHAPING_X, _free_count_x--);
+        TERN_(INPUT_SHAPING_Y, _free_count_y--);
+        TERN_(INPUT_SHAPING_X, if (echo_axes[head_x].x == ECHO_NONE) dequeue_x());
+        TERN_(INPUT_SHAPING_Y, if (echo_axes[head_y].y == ECHO_NONE) dequeue_y());
       }
-      #ifdef HAS_SHAPING_X
-        shaping_time_t peek_x() {
-          if (head_x != tail) return times[head_x] + delay_x - now;
-          else return shaping_time_t(-1);
+      #if ENABLED(INPUT_SHAPING_X)
+        static shaping_time_t peek_x() { return peek_x_val; }
+        static bool dequeue_x() {
+          bool forward = echo_axes[head_x].x == ECHO_FWD;
+          do {
+            _free_count_x++;
+            if (++head_x == shaping_echoes) head_x = 0;
+          } while (head_x != tail && echo_axes[head_x].x == ECHO_NONE);
+          peek_x_val = head_x == tail ? shaping_time_t(-1) : times[head_x] + delay_x - now;
+          return forward;
         }
-        void dequeue_x() { if (++head_x == SIZE) head_x = 0; }
-        bool empty_x() { return head_x == tail; }
-        uint16_t free_count_x() { return head_x > tail ? head_x - tail - 1 : head_x + SIZE - tail - 1; }
+        static bool empty_x() { return head_x == tail; }
+        static uint16_t free_count_x() { return _free_count_x; }
       #endif
-      #ifdef HAS_SHAPING_Y
-        shaping_time_t peek_y() {
-          if (head_y != tail) return times[head_y] + delay_y - now;
-          else return shaping_time_t(-1);
+      #if ENABLED(INPUT_SHAPING_Y)
+        static shaping_time_t peek_y() { return peek_y_val; }
+        static bool dequeue_y() {
+          bool forward = echo_axes[head_y].y == ECHO_FWD;
+          do {
+            _free_count_y++;
+            if (++head_y == shaping_echoes) head_y = 0;
+          } while (head_y != tail && echo_axes[head_y].y == ECHO_NONE);
+          peek_y_val = head_y == tail ? shaping_time_t(-1) : times[head_y] + delay_y - now;
+          return forward;
         }
-        void dequeue_y() { if (++head_y == SIZE) head_y = 0; }
-        bool empty_y() { return head_y == tail; }
-        uint16_t free_count_y() { return head_y > tail ? head_y - tail - 1 : head_y + SIZE - tail - 1; }
+        static bool empty_y() { return head_y == tail; }
+        static uint16_t free_count_y() { return _free_count_y; }
       #endif
-      void purge() { auto temp = TERN_(HAS_SHAPING_X, head_x) = TERN_(HAS_SHAPING_Y, head_y) = tail; UNUSED(temp);}
-  };
-
-  class ParamDelayQueue : public DelayQueue<shaping_segments> {
-    private:
-      #ifdef HAS_SHAPING_X
-        int32_t params_x[shaping_segments];
-      #endif
-      #ifdef HAS_SHAPING_Y
-        int32_t params_y[shaping_segments];
-      #endif
-
-    public:
-      void enqueue(const int32_t param_x, const int32_t param_y) {
-        TERN(HAS_SHAPING_X, params_x[DelayQueue<shaping_segments>::tail] = param_x, UNUSED(param_x));
-        TERN(HAS_SHAPING_Y, params_y[DelayQueue<shaping_segments>::tail] = param_y, UNUSED(param_y));
-        DelayQueue<shaping_segments>::enqueue();
+      static void purge() {
+        const auto st = shaping_time_t(-1);
+        #if ENABLED(INPUT_SHAPING_X)
+          head_x = tail; _free_count_x = shaping_echoes - 1; peek_x_val = st;
+        #endif
+        #if ENABLED(INPUT_SHAPING_Y)
+          head_y = tail; _free_count_y = shaping_echoes - 1; peek_y_val = st;
+        #endif
       }
-      #ifdef HAS_SHAPING_X
-        const int32_t dequeue_x() {
-          const int32_t result = params_x[DelayQueue<shaping_segments>::head_x];
-          DelayQueue<shaping_segments>::dequeue_x();
-          return result;
-        }
-      #endif
-      #ifdef HAS_SHAPING_Y
-        const int32_t dequeue_y() {
-          const int32_t result = params_y[DelayQueue<shaping_segments>::head_y];
-          DelayQueue<shaping_segments>::dequeue_y();
-          return result;
-        }
-      #endif
   };
 
   struct ShapeParams {
     float frequency;
     float zeta;
-    uint8_t factor;
-    int32_t dividend;
+    bool enabled;
+    int16_t delta_error = 0;    // delta_error for seconday bresenham mod 128
+    uint8_t factor1;
+    uint8_t factor2;
+    bool forward;
+    int32_t last_block_end_pos = 0;
   };
 
-#endif // INPUT_SHAPING
+#endif // HAS_SHAPING
 
 //
 // Stepper class definition
@@ -527,13 +565,11 @@ class Stepper {
       static bool bezier_2nd_half; // If Bézier curve has been initialized or not
     #endif
 
-    #if ENABLED(INPUT_SHAPING)
-      static ParamDelayQueue shaping_dividend_queue;
-      static DelayQueue<shaping_dividends> shaping_queue;
-      #if HAS_SHAPING_X
+    #if HAS_SHAPING
+      #if ENABLED(INPUT_SHAPING_X)
         static ShapeParams shaping_x;
       #endif
-      #if HAS_SHAPING_Y
+      #if ENABLED(INPUT_SHAPING_Y)
         static ShapeParams shaping_y;
       #endif
     #endif
@@ -597,7 +633,7 @@ class Stepper {
     // The stepper block processing ISR phase
     static uint32_t block_phase_isr();
 
-    #if ENABLED(INPUT_SHAPING)
+    #if HAS_SHAPING
       static void shaping_isr();
     #endif
 
@@ -620,6 +656,20 @@ class Stepper {
     // Check if the given block is busy or not - Must not be called from ISR contexts
     static bool is_block_busy(const block_t * const block);
 
+    #if HAS_SHAPING
+      // Check whether the stepper is processing any input shaping echoes
+      static bool input_shaping_busy() {
+        const bool was_on = hal.isr_state();
+        hal.isr_off();
+
+        const bool result = TERN0(INPUT_SHAPING_X, !ShapingQueue::empty_x()) || TERN0(INPUT_SHAPING_Y, !ShapingQueue::empty_y());
+
+        if (was_on) hal.isr_on();
+
+        return result;
+      }
+    #endif
+
     // Get the position of a stepper, in steps
     static int32_t position(const AxisEnum axis);
 
@@ -754,7 +804,7 @@ class Stepper {
       set_directions();
     }
 
-    #if ENABLED(INPUT_SHAPING)
+    #if HAS_SHAPING
       static void set_shaping_damping_ratio(const AxisEnum axis, const float zeta);
       static float get_shaping_damping_ratio(const AxisEnum axis);
       static void set_shaping_frequency(const AxisEnum axis, const float freq);
diff --git a/buildroot/tests/mega2560 b/buildroot/tests/mega2560
index 18a6ea88c9..4167c3e4bf 100755
--- a/buildroot/tests/mega2560
+++ b/buildroot/tests/mega2560
@@ -80,9 +80,9 @@ opt_set MOTHERBOARD BOARD_AZTEEG_X3_PRO MIXING_STEPPERS 5 LCD_LANGUAGE ru \
         FIL_RUNOUT2_PIN 16 FIL_RUNOUT3_PIN 17 FIL_RUNOUT4_PIN 4 FIL_RUNOUT5_PIN 5
 opt_enable MIXING_EXTRUDER GRADIENT_MIX GRADIENT_VTOOL CR10_STOCKDISPLAY \
            USE_CONTROLLER_FAN CONTROLLER_FAN_EDITABLE CONTROLLER_FAN_IGNORE_Z \
-           FILAMENT_RUNOUT_SENSOR ADVANCED_PAUSE_FEATURE NOZZLE_PARK_FEATURE INPUT_SHAPING
+           FILAMENT_RUNOUT_SENSOR ADVANCED_PAUSE_FEATURE NOZZLE_PARK_FEATURE INPUT_SHAPING_X INPUT_SHAPING_Y
 opt_disable DISABLE_INACTIVE_EXTRUDER
-exec_test $1 $2 "Azteeg X3 | Mixing Extruder (x5) | Gradient Mix | Greek" "$3"
+exec_test $1 $2 "Azteeg X3 | Mixing Extruder (x5) | Gradient Mix | Input Shaping | Greek" "$3"
 
 #
 # Test SPEAKER with BOARD_BQ_ZUM_MEGA_3D and BQ_LCD_SMART_CONTROLLER
diff --git a/ini/features.ini b/ini/features.ini
index 7c8fd2fd8f..e376e2757e 100644
--- a/ini/features.ini
+++ b/ini/features.ini
@@ -187,7 +187,7 @@ HAS_DUPLICATION_MODE                   = src_filter=+<src/gcode/control/M605.cpp
 LIN_ADVANCE                            = src_filter=+<src/gcode/feature/advance>
 PHOTO_GCODE                            = src_filter=+<src/gcode/feature/camera>
 CONTROLLER_FAN_EDITABLE                = src_filter=+<src/gcode/feature/controllerfan>
-INPUT_SHAPING                          = src_filter=+<src/gcode/feature/input_shaping>
+HAS_SHAPING                            = src_filter=+<src/gcode/feature/input_shaping>
 GCODE_MACROS                           = src_filter=+<src/gcode/feature/macro>
 GRADIENT_MIX                           = src_filter=+<src/gcode/feature/mixing/M166.cpp>
 HAS_SAVED_POSITIONS                    = src_filter=+<src/gcode/feature/pause/G60.cpp> +<src/gcode/feature/pause/G61.cpp>