Merge branch 'MK3_3.12_Optimizations' of https://github.com/leptun/Prusa-Firmware into MK3_3.12_Optimizations

This commit is contained in:
Guðni Már Gilbert 2022-02-01 18:37:57 +00:00
commit 8681d84c44

View file

@ -8,30 +8,36 @@ extern const uint16_t speed_lookuptable_slow[256][2] PROGMEM;
#ifndef _NO_ASM #ifndef _NO_ASM
// intRes = intIn1 * intIn2 >> 8 // return ((x * y) >> 8) with rounding when shifting right
// uses: FORCE_INLINE uint16_t MUL8x16R8(uint8_t x, uint16_t y) {
// r26 to store 0 uint16_t out;
// r27 to store the byte 1 of the 24 bit result __asm__ (
#define MultiU16X8toH16(intRes, charIn1, intIn2) \ // %0 out
asm volatile ( \ // %1 x
"clr r26 \n\t" \ // %2 y
"mul %A1, %B2 \n\t" \ // uint8_t: %An or %n
"movw %A0, r0 \n\t" \ // uint16_t: %Bn %An
"mul %A1, %A2 \n\t" \ // __uint24: %Cn %Bn %An
"add %A0, r1 \n\t" \ // uint32_t: %Dn %Cn %Bn %An
"adc %B0, r26 \n\t" \ //
"lsr r0 \n\t" \ //
"adc %A0, r26 \n\t" \ // B2 A2 *
"adc %B0, r26 \n\t" \ // A1
"clr r1 \n\t" \ //---------
: \ // B0 A0 RR
"=&r" (intRes) \ "mul %B2, %A1" "\n\t"
: \ "movw %0, r0" "\n\t"
"d" (charIn1), \ "mul %A2, %A1" "\n\t"
"d" (intIn2) \ "lsl r0" "\n\t" //push MSB to carry for rounding
: \ "adc %A0, r1" "\n\t" //add with carry (for rounding)
"r26" \ "clr r1" "\n\t" //make r1 __zero_reg__ again
) "adc %B0, r1" "\n\t" //propagate carry of addition (add 0 with carry)
: "=&r" (out)
: "r" (x), "r" (y)
: "r0", "r1" //clobbers: Technically these are either scratch registers or always 0 registers, but I'm making sure the compiler knows just in case.
);
return out;
}
// intRes = longIn1 * longIn2 >> 24 // intRes = longIn1 * longIn2 >> 24
// uses: // uses:
@ -115,8 +121,7 @@ FORCE_INLINE unsigned short calc_timer(uint16_t step_rate, uint8_t& step_loops)
unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate>>8)][0]; unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate>>8)][0];
unsigned char tmp_step_rate = (step_rate & 0x00ff); unsigned char tmp_step_rate = (step_rate & 0x00ff);
uint16_t gain = (uint16_t)pgm_read_word_near(table_address+2); uint16_t gain = (uint16_t)pgm_read_word_near(table_address+2);
MultiU16X8toH16(timer, tmp_step_rate, gain); timer = (unsigned short)pgm_read_word_near(table_address) - MUL8x16R8(tmp_step_rate, gain);
timer = (unsigned short)pgm_read_word_near(table_address) - timer;
} }
else { // lower step rates else { // lower step rates
unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0]; unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0];