diff --git a/Marlin/stepper.cpp b/Marlin/stepper.cpp index f7ce788de0..62324dec1a 100644 --- a/Marlin/stepper.cpp +++ b/Marlin/stepper.cpp @@ -100,6 +100,13 @@ bool Stepper::abort_current_block; bool Stepper::locked_z_motor = false, Stepper::locked_z2_motor = false; #endif +/** + * Marlin uses the Bresenham algorithm. For a detailed explanation of theory and + * method see https://www.cs.helsinki.fi/group/goa/mallinnus/lines/bresenh.html + * + * The implementation used here additionally rounds up the starting seed. + */ + int32_t Stepper::counter_X = 0, Stepper::counter_Y = 0, Stepper::counter_Z = 0, @@ -369,15 +376,15 @@ void Stepper::set_directions() { #if ENABLED(S_CURVE_ACCELERATION) /** - * We are using a quintic (fifth-degree) Bézier polynomial for the velocity curve. - * This gives us a "linear pop" velocity curve; with pop being the sixth derivative of position: + * This uses a quintic (fifth-degree) Bézier polynomial for the velocity curve, giving + * a "linear pop" velocity curve; with pop being the sixth derivative of position: * velocity - 1st, acceleration - 2nd, jerk - 3rd, snap - 4th, crackle - 5th, pop - 6th * * The Bézier curve takes the form: * * V(t) = P_0 * B_0(t) + P_1 * B_1(t) + P_2 * B_2(t) + P_3 * B_3(t) + P_4 * B_4(t) + P_5 * B_5(t) * - * Where 0 <= t <= 1, and V(t) is the velocity. P_0 through P_5 are the control points, and B_0(t) + * Where 0 <= t <= 1, and V(t) is the velocity. P_0 through P_5 are the control points, and B_0(t) * through B_5(t) are the Bernstein basis as follows: * * B_0(t) = (1-t)^5 = -t^5 + 5t^4 - 10t^3 + 10t^2 - 5t + 1 @@ -390,7 +397,7 @@ void Stepper::set_directions() { * | | | | | | * A B C D E F * - * Unfortunately, we cannot use forward-differencing to calculate each position through + * Unfortunately, we cannot use forward-differencing to calculate each position through * the curve, as Marlin uses variable timer periods. So, we require a formula of the form: * * V_f(t) = A*t^5 + B*t^4 + C*t^3 + D*t^2 + E*t + F @@ -405,7 +412,7 @@ void Stepper::set_directions() { * E = - 5*P_0 + 5*P_1 * F = P_0 * - * Now, since we will (currently) *always* want the initial acceleration and jerk values to be 0, + * Now, since we will (currently) *always* want the initial acceleration and jerk values to be 0, * We set P_i = P_0 = P_1 = P_2 (initial velocity), and P_t = P_3 = P_4 = P_5 (target velocity), * which, after simplification, resolves to: * @@ -416,12 +423,12 @@ void Stepper::set_directions() { * E = 0 * F = P_i * - * As the t is evaluated in non uniform steps here, there is no other way rather than evaluating + * As the t is evaluated in non uniform steps here, there is no other way rather than evaluating * the Bézier curve at each point: * * V_f(t) = A*t^5 + B*t^4 + C*t^3 + F [0 <= t <= 1] * - * Floating point arithmetic execution time cost is prohibitive, so we will transform the math to + * Floating point arithmetic execution time cost is prohibitive, so we will transform the math to * use fixed point values to be able to evaluate it in realtime. Assuming a maximum of 250000 steps * per second (driver pulses should at least be 2µS hi/2µS lo), and allocating 2 bits to avoid * overflows on the evaluation of the Bézier curve, means we can use @@ -432,7 +439,7 @@ void Stepper::set_directions() { * C: signed Q24.7 , |range = +/- 250000 *10 * 128 = +/- 320000000 = 0x1312D000 | 29 bits + sign * F: signed Q24.7 , |range = +/- 250000 * 128 = 32000000 = 0x01E84800 | 25 bits + sign * - * The trapezoid generator state contains the following information, that we will use to create and evaluate + * The trapezoid generator state contains the following information, that we will use to create and evaluate * the Bézier curve: * * blk->step_event_count [TS] = The total count of steps for this movement. (=distance) @@ -444,7 +451,7 @@ void Stepper::set_directions() { * * For Any 32bit CPU: * - * At the start of each trapezoid, we calculate the coefficients A,B,C,F and Advance [AV], as follows: + * At the start of each trapezoid, calculate the coefficients A,B,C,F and Advance [AV], as follows: * * A = 6*128*(VF - VI) = 768*(VF - VI) * B = 15*128*(VI - VF) = 1920*(VI - VF) @@ -452,7 +459,7 @@ void Stepper::set_directions() { * F = 128*VI = 128*VI * AV = (1<<32)/TS ~= 0xFFFFFFFF / TS (To use ARM UDIV, that is 32 bits) (this is computed at the planner, to offload expensive calculations from the ISR) * - * And for each point, we will evaluate the curve with the following sequence: + * And for each point, evaluate the curve with the following sequence: * * void lsrs(uint32_t& d, uint32_t s, int cnt) { * d = s >> cnt; @@ -505,10 +512,10 @@ void Stepper::set_directions() { * return alo; * } * - * This will be rewritten in ARM assembly to get peak performance and will take 43 cycles to execute + * This is rewritten in ARM assembly for optimal performance (43 cycles to execute). * - * For AVR, we scale precision of coefficients to make it possible to evaluate the Bézier curve in - * realtime: Let's reduce precision as much as possible. After some experimentation we found that: + * For AVR, the precision of coefficients is scaled so the Bézier curve can be evaluated in real-time: + * Let's reduce precision as much as possible. After some experimentation we found that: * * Assume t and AV with 24 bits is enough * A = 6*(VF - VI) @@ -517,9 +524,9 @@ void Stepper::set_directions() { * F = VI * AV = (1<<24)/TS (this is computed at the planner, to offload expensive calculations from the ISR) * - * Instead of storing sign for each coefficient, we will store its absolute value, + * Instead of storing sign for each coefficient, we will store its absolute value, * and flag the sign of the A coefficient, so we can save to store the sign bit. - * It always holds that sign(A) = - sign(B) = sign(C) + * It always holds that sign(A) = - sign(B) = sign(C) * * So, the resulting range of the coefficients are: * @@ -529,7 +536,7 @@ void Stepper::set_directions() { * C: signed Q24 , range = 250000 *10 = 2500000 = 0x1312D0 | 21 bits * F: signed Q24 , range = 250000 = 250000 = 0x0ED090 | 20 bits * - * And for each curve, we estimate its coefficients with: + * And for each curve, estimate its coefficients with: * * void _calc_bezier_curve_coeffs(int32_t v0, int32_t v1, uint32_t av) { * // Calculate the Bézier coefficients @@ -548,7 +555,7 @@ void Stepper::set_directions() { * bezier_F = v0; * } * - * And for each point, we will evaluate the curve with the following sequence: + * And for each point, evaluate the curve with the following sequence: * * // unsigned multiplication of 24 bits x 24bits, return upper 16 bits * void umul24x24to16hi(uint16_t& r, uint24_t op1, uint24_t op2) { @@ -598,9 +605,8 @@ void Stepper::set_directions() { * } * return acc; * } - * Those functions will be translated into assembler to get peak performance. coefficient calculations takes 70 cycles, - * Bezier point evaluation takes 150 cycles - * + * These functions are translated to assembler for optimal performance. + * Coefficient calculation takes 70 cycles. Bezier point evaluation takes 150 cycles. */ // For AVR we use assembly to maximize speed @@ -1138,7 +1144,7 @@ hal_timer_t Stepper::isr_scheduler() { // Limit the amount of iterations uint8_t max_loops = 10; - + // We need this variable here to be able to use it in the following loop hal_timer_t min_ticks; do { @@ -1258,12 +1264,12 @@ void Stepper::stepper_pulse_phase_isr() { // Advance the Bresenham counter; start a pulse if the axis needs a step #define PULSE_START(AXIS) do{ \ _COUNTER(AXIS) += current_block->steps[_AXIS(AXIS)]; \ - if (_COUNTER(AXIS) > 0) { _APPLY_STEP(AXIS)(!_INVERT_STEP_PIN(AXIS), 0); } \ + if (_COUNTER(AXIS) >= 0) { _APPLY_STEP(AXIS)(!_INVERT_STEP_PIN(AXIS), 0); } \ }while(0) // Advance the Bresenham counter; start a pulse if the axis needs a step #define STEP_TICK(AXIS) do { \ - if (_COUNTER(AXIS) > 0) { \ + if (_COUNTER(AXIS) >= 0) { \ _COUNTER(AXIS) -= current_block->step_event_count; \ count_position[_AXIS(AXIS)] += count_direction[_AXIS(AXIS)]; \ } \ @@ -1351,7 +1357,7 @@ void Stepper::stepper_pulse_phase_isr() { #if ENABLED(LIN_ADVANCE) counter_E += current_block->steps[E_AXIS]; - if (counter_E > 0) { + if (counter_E >= 0) { #if DISABLED(MIXING_EXTRUDER) // Don't step E here for mixing extruder motor_direction(E_AXIS) ? --e_steps : ++e_steps; @@ -1363,7 +1369,7 @@ void Stepper::stepper_pulse_phase_isr() { const bool dir = motor_direction(E_AXIS); MIXING_STEPPERS_LOOP(j) { counter_m[j] += current_block->steps[E_AXIS]; - if (counter_m[j] > 0) { + if (counter_m[j] >= 0) { counter_m[j] -= current_block->mix_event_count[j]; dir ? --e_steps[j] : ++e_steps[j]; } @@ -1380,7 +1386,7 @@ void Stepper::stepper_pulse_phase_isr() { // Step mixing steppers (proportionally) counter_m[j] += current_block->steps[E_AXIS]; // Step when the counter goes over zero - if (counter_m[j] > 0) En_STEP_WRITE(j, !INVERT_E_STEP_PIN); + if (counter_m[j] >= 0) En_STEP_WRITE(j, !INVERT_E_STEP_PIN); } #else // !MIXING_EXTRUDER PULSE_START(E); @@ -1420,7 +1426,7 @@ void Stepper::stepper_pulse_phase_isr() { #if DISABLED(LIN_ADVANCE) #if ENABLED(MIXING_EXTRUDER) MIXING_STEPPERS_LOOP(j) { - if (counter_m[j] > 0) { + if (counter_m[j] >= 0) { counter_m[j] -= current_block->mix_event_count[j]; En_STEP_WRITE(j, INVERT_E_STEP_PIN); } @@ -1702,11 +1708,11 @@ uint32_t Stepper::stepper_block_phase_isr() { bezier_2nd_half = false; #endif - // Initialize Bresenham counters to 1/2 the ceiling - counter_X = counter_Y = counter_Z = counter_E = -((int32_t)(current_block->step_event_count >> 1)); + // Initialize Bresenham counters to 1/2 the ceiling, with proper roundup (as explained in the article linked above) + counter_X = counter_Y = counter_Z = counter_E = -int32_t((current_block->step_event_count >> 1) + (current_block->step_event_count & 1)); #if ENABLED(MIXING_EXTRUDER) MIXING_STEPPERS_LOOP(i) - counter_m[i] = -(current_block->mix_event_count[i] >> 1); + counter_m[i] = -int32_t((current_block->mix_event_count[i] >> 1) + (current_block->mix_event_count[i] & 1)); #endif #if ENABLED(Z_LATE_ENABLE)