diff --git a/Marlin/Marlin_main.cpp b/Marlin/Marlin_main.cpp
index e9b57c69dc..8db285174c 100644
--- a/Marlin/Marlin_main.cpp
+++ b/Marlin/Marlin_main.cpp
@@ -11524,15 +11524,13 @@ void prepare_move_to_destination() {
       #ifdef TCCR0A
         case TIMER0A:
         case TIMER0B:
-          // TCCR0B &= ~(_BV(CS00) | _BV(CS01) | _BV(CS02));
-          // TCCR0B |= val;
+          //SET_CS(0, val);
           break;
       #endif
       #ifdef TCCR1A
         case TIMER1A:
         case TIMER1B:
-          // TCCR1B &= ~(_BV(CS10) | _BV(CS11) | _BV(CS12));
-          // TCCR1B |= val;
+          //SET_CS(1, val);
           break;
       #endif
       #ifdef TCCR2
@@ -11545,32 +11543,28 @@ void prepare_move_to_destination() {
       #ifdef TCCR2A
         case TIMER2A:
         case TIMER2B:
-          TCCR2B &= ~(_BV(CS20) | _BV(CS21) | _BV(CS22));
-          TCCR2B |= val;
+          SET_CS(2, val);
           break;
       #endif
       #ifdef TCCR3A
         case TIMER3A:
         case TIMER3B:
         case TIMER3C:
-          TCCR3B &= ~(_BV(CS30) | _BV(CS31) | _BV(CS32));
-          TCCR3B |= val;
+          SET_CS(3, val);
           break;
       #endif
       #ifdef TCCR4A
         case TIMER4A:
         case TIMER4B:
         case TIMER4C:
-          TCCR4B &= ~(_BV(CS40) | _BV(CS41) | _BV(CS42));
-          TCCR4B |= val;
+          SET_CS(4, val);
           break;
       #endif
       #ifdef TCCR5A
         case TIMER5A:
         case TIMER5B:
         case TIMER5C:
-          TCCR5B &= ~(_BV(CS50) | _BV(CS51) | _BV(CS52));
-          TCCR5B |= val;
+          SET_CS(5, val);
           break;
       #endif
     }
diff --git a/Marlin/fastio.h b/Marlin/fastio.h
index 6cc2e85a72..0e73a39483 100644
--- a/Marlin/fastio.h
+++ b/Marlin/fastio.h
@@ -21,99 +21,135 @@
  */
 
 /**
-  This code contributed by Triffid_Hunter and modified by Kliment
-  why double up on these macros? see http://gcc.gnu.org/onlinedocs/cpp/Stringification.html
-*/
+ * Contributed by Triffid_Hunter, modified by Kliment, extended by the Marlin team
+ * Why double up on these macros? see http://gcc.gnu.org/onlinedocs/cpp/Stringification.html
+ */
 
 #ifndef _FASTIO_ARDUINO_H
 #define _FASTIO_ARDUINO_H
 
 #include <avr/io.h>
 
-/**
-  utility functions
-*/
-
-#ifndef MASK
-  #define MASK(PIN)  (1 << PIN)
+#ifndef _BV
+  #define _BV(PIN) (1 << PIN)
 #endif
 
 /**
-  magic I/O routines
-  now you can simply SET_OUTPUT(STEP); WRITE(STEP, 1); WRITE(STEP, 0);
-*/
+ * Magic I/O routines
+ *
+ * Now you can simply SET_OUTPUT(PIN); WRITE(PIN, HIGH); WRITE(PIN, LOW);
+ */
 
-/// Read a pin
-#define _READ(IO) ((bool)(DIO ## IO ## _RPORT & MASK(DIO ## IO ## _PIN)))
-/// write to a pin
-// On some boards pins > 0x100 are used. These are not converted to atomic actions. An critical section is needed.
+#define _READ(IO) ((bool)(DIO ## IO ## _RPORT & _BV(DIO ## IO ## _PIN)))
 
-#define _WRITE_NC(IO, v)  do { if (v) {DIO ##  IO ## _WPORT |= MASK(DIO ## IO ## _PIN); } else {DIO ##  IO ## _WPORT &= ~MASK(DIO ## IO ## _PIN); }; } while (0)
+// On some boards pins > 0x100 are used. These are not converted to atomic actions. A critical section is needed.
+
+#define _WRITE_NC(IO, v)  do { if (v) {DIO ##  IO ## _WPORT |= _BV(DIO ## IO ## _PIN); } else {DIO ##  IO ## _WPORT &= ~_BV(DIO ## IO ## _PIN); }; } while (0)
 
 #define _WRITE_C(IO, v)   do { if (v) { \
                                          CRITICAL_SECTION_START; \
-                                         {DIO ##  IO ## _WPORT |= MASK(DIO ## IO ## _PIN); } \
+                                         {DIO ##  IO ## _WPORT |= _BV(DIO ## IO ## _PIN); } \
                                          CRITICAL_SECTION_END; \
                                        } \
                                        else { \
                                          CRITICAL_SECTION_START; \
-                                         {DIO ##  IO ## _WPORT &= ~MASK(DIO ## IO ## _PIN); } \
+                                         {DIO ##  IO ## _WPORT &= ~_BV(DIO ## IO ## _PIN); } \
                                          CRITICAL_SECTION_END; \
                                        } \
                                      } \
                                      while (0)
 
-#define _WRITE(IO, v)  do {  if (&(DIO ##  IO ## _RPORT) >= (uint8_t *)0x100) {_WRITE_C(IO, v); } else {_WRITE_NC(IO, v); }; } while (0)
+#define _WRITE(IO, v) do { if (&(DIO ## IO ## _RPORT) >= (uint8_t *)0x100) {_WRITE_C(IO, v); } else {_WRITE_NC(IO, v); }; } while (0)
 
-/// toggle a pin
-#define _TOGGLE(IO)  do {DIO ##  IO ## _RPORT ^= MASK(DIO ## IO ## _PIN); } while (0)
+#define _TOGGLE(IO) do {DIO ## IO ## _RPORT ^= _BV(DIO ## IO ## _PIN); } while (0)
 
-/// set pin as input
-#define _SET_INPUT(IO) do {DIO ##  IO ## _DDR &= ~MASK(DIO ## IO ## _PIN); } while (0)
-/// set pin as output
-#define _SET_OUTPUT(IO) do {DIO ##  IO ## _DDR |=  MASK(DIO ## IO ## _PIN); } while (0)
+#define _SET_INPUT(IO) do {DIO ## IO ## _DDR &= ~_BV(DIO ## IO ## _PIN); } while (0)
+#define _SET_OUTPUT(IO) do {DIO ## IO ## _DDR |= _BV(DIO ## IO ## _PIN); } while (0)
 
-/// check if pin is an input
-#define _GET_INPUT(IO)  ((DIO ## IO ## _DDR & MASK(DIO ## IO ## _PIN)) == 0)
-/// check if pin is an output
-#define _GET_OUTPUT(IO)  ((DIO ## IO ## _DDR & MASK(DIO ## IO ## _PIN)) != 0)
+#define _GET_INPUT(IO) ((DIO ## IO ## _DDR & _BV(DIO ## IO ## _PIN)) == 0)
+#define _GET_OUTPUT(IO) ((DIO ## IO ## _DDR & _BV(DIO ## IO ## _PIN)) != 0)
+#define _GET_TIMER(IO) (DIO ## IO ## _PWM)
 
-/// check if pin is an timer
-#define _GET_TIMER(IO)  (DIO ## IO ## _PWM)
+#define READ(IO) _READ(IO)
+#define WRITE(IO,V) _WRITE(IO,V)
+#define TOGGLE(IO) _TOGGLE(IO)
 
-//  why double up on these macros? see http://gcc.gnu.org/onlinedocs/cpp/Stringification.html
-
-/// Read a pin wrapper
-#define READ(IO)  _READ(IO)
-/// Write to a pin wrapper
-#define WRITE(IO, v)  _WRITE(IO, v)
-
-/// toggle a pin wrapper
-#define TOGGLE(IO)  _TOGGLE(IO)
-
-/// set pin as input wrapper
-#define SET_INPUT(IO)  _SET_INPUT(IO)
-/// set pin as input with pullup wrapper
+#define SET_INPUT(IO) _SET_INPUT(IO)
 #define SET_INPUT_PULLUP(IO) do{ _SET_INPUT(IO); _WRITE(IO, HIGH); }while(0)
-/// set pin as output wrapper
-#define SET_OUTPUT(IO)  _SET_OUTPUT(IO)
+#define SET_OUTPUT(IO) _SET_OUTPUT(IO)
 
-/// check if pin is an input wrapper
-#define GET_INPUT(IO)  _GET_INPUT(IO)
-/// check if pin is an output wrapper
-#define GET_OUTPUT(IO)  _GET_OUTPUT(IO)
+#define GET_INPUT(IO) _GET_INPUT(IO)
+#define GET_OUTPUT(IO) _GET_OUTPUT(IO)
+#define GET_TIMER(IO) _GET_TIMER(IO)
 
-/// check if pin is an timer wrapper
-#define GET_TIMER(IO)  _GET_TIMER(IO)
-
-// Shorthand
-#define OUT_WRITE(IO, v) { SET_OUTPUT(IO); WRITE(IO, v); }
+#define OUT_WRITE(IO, v) do{ SET_OUTPUT(IO); WRITE(IO, v); }while(0)
 
 /**
-  ports and functions
+ * Interrupt Control
+ */
 
-  added as necessary or if I feel like it- not a comprehensive list!
-*/
+// Waveform Generation Modes
+typedef enum {
+  WGM_NORMAL,          //  0
+  WGM_PWM_PC_8,        //  1
+  WGM_PWM_PC_9,        //  2
+  WGM_PWM_PC_10,       //  3
+  WGM_CTC_OCRnA,       //  4  COM OCnx
+  WGM_FAST_PWM_8,      //  5
+  WGM_FAST_PWM_9,      //  6
+  WGM_FAST_PWM_10,     //  7
+  WGM_PWM_PC_FC_ICRn,  //  8
+  WGM_PWM_PC_FC_OCRnA, //  9  COM OCnA
+  WGM_PWM_PC_ICRn,     // 10
+  WGM_PWM_PC_OCRnA,    // 11  COM OCnA
+  WGM_CTC_ICRn,        // 12  COM OCnx
+  WGM_reserved,        // 13
+  WGM_FAST_PWM_ICRn,   // 14  COM OCnA
+  WGM_FAST_PWM_OCRnA   // 15  COM OCnA
+} WaveGenMode;
+
+// Compare Modes
+typedef enum {
+  COM_NORMAL,          //  0
+  COM_TOGGLE,          //  1  Non-PWM: OCnx ... Both PWM (WGM 9,11,14,15): OCnA only ... else NORMAL
+  COM_CLEAR_SET,       //  2  Non-PWM: OCnx ... Fast PWM: OCnx/Bottom ... PF-FC: OCnx Up/Down
+  COM_SET_CLEAR        //  3  Non-PWM: OCnx ... Fast PWM: OCnx/Bottom ... PF-FC: OCnx Up/Down
+} CompareMode;
+
+// Clock Sources
+typedef enum {
+  CS_NONE,             //  0
+  CS_PRESCALER_1,      //  1
+  CS_PRESCALER_8,      //  2
+  CS_PRESCALER_64,     //  3
+  CS_PRESCALER_256,    //  4
+  CS_PRESCALER_1024,   //  5
+  CS_EXT_FALLING,      //  6
+  CS_EXT_RISING        //  7
+} ClockSource;
+
+#define SET_WGM(T,V) do{ \
+    TCCR##T##A = (TCCR##T##A & ~(0x3 << WGM##T##0)) | (( int(V)       & 0x3) << WGM##T##0); \
+    TCCR##T##B = (TCCR##T##B & ~(0x3 << WGM##T##2)) | (((int(V) >> 2) & 0x3) << WGM##T##2); \
+  }while(0)
+
+#define SET_CS(T,V) do{ \
+    TCCR##T##B = (TCCR##T##B & ~(0x7 << CS10)) | ((int(V) & 0x7) << CS10); \
+  }while(0)
+
+#define SET_COM(T,Q,V) do{ \
+    TCCR##T##Q = (TCCR##T##Q & !(0x3 << COM1##Q##0) | (int(V) & 0x3) << COM1##Q##0); \
+  }while(0)
+#define SET_COMA(T,V) SET_COM(T,A,V)
+#define SET_COMB(T,V) SET_COM(T,B,V)
+#define SET_COMS(T,V1,V2) do{ SET_COMA(T,V1); SET_COMB(T,V2); }while(0)
+
+#define SET_ICNC(T,V) (TCCR##T##B = (TCCR##T##B & ~_BV(7) | ((V) & 1) << 7))
+#define SET_ICES(T,V) (TCCR##T##B = (TCCR##T##B & ~_BV(6) | ((V) & 1) << 6))
+
+/**
+ * Ports and Functions
+ */
 
 #if defined(__AVR_ATmega168__) || defined(__AVR_ATmega328__) || defined(__AVR_ATmega328P__)
   // UART
@@ -446,7 +482,7 @@
   #define PD7_WPORT   PORTD
   #define PD7_DDR     DDRD
   #define PD7_PWM     NULL
-#endif  /*  _AVR_ATmega{168,328,328P}__ */
+#endif // __AVR_ATmega(168|328|328P)__
 
 #if defined(__AVR_ATmega644__) || defined(__AVR_ATmega644P__) || defined(__AVR_ATmega644PA__) || defined(__AVR_ATmega1284P__)
   // UART
@@ -949,7 +985,7 @@
   #define PD7_WPORT   PORTD
   #define PD7_DDR     DDRD
   #define PD7_PWM     OCR2A
-#endif  /*  _AVR_ATmega{644,644P,644PA}__ */
+#endif // __AVR_ATmega(644|644P|644PA)__
 
 #if defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__)
   // UART
@@ -2031,8 +2067,7 @@
   #define PL7_WPORT   PORTL
   #define PL7_DDR     DDRL
   #define PL7_PWM     NULL
-
-#endif
+#endif // __AVR_ATmega(1280|2560)__
 
 #if defined(__AVR_AT90USB1287__) || defined(__AVR_AT90USB1286__) || defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB647__)
 
@@ -2040,8 +2075,8 @@
   #define DEBUG_LED   DIO31 /* led D5 red */
 
   /**
-  pins
-  */
+   * pins
+   */
 
   //#define AT90USBxx_TEENSYPP_ASSIGNMENTS // Use Teensy++ 2.0 assignments
   #ifndef AT90USBxx_TEENSYPP_ASSIGNMENTS // Use traditional Marlin pin assignments
@@ -3335,8 +3370,7 @@
     #define PF7_DDR     DDRF
 
   #endif // AT90USBxx_TEENSYPP_ASSIGNMENTS Teensyduino assignments
-#endif // __AVR_AT90usbxxx__
-
+#endif // __AVR_AT90USB(1287|1286|646|647)__
 
 #if defined(__AVR_ATmega1281__) || defined(__AVR_ATmega2561__)
   // UART
@@ -4027,12 +4061,10 @@
   #define PG5_WPORT   PORTG
   #define PG5_DDR     DDRG
   #define PG5_PWM     &OCR0B
-
-
-#endif
+#endif // __AVR_ATmega(1281|2561)__
 
 #ifndef DIO0_PIN
   #error "pins for this chip not defined in arduino.h! If you write an appropriate pin definition and have this firmware work on your chip, please submit a pull request"
 #endif
 
-#endif /* _FASTIO_ARDUINO_H */
+#endif // _FASTIO_ARDUINO_H
diff --git a/Marlin/stepper.cpp b/Marlin/stepper.cpp
index 0ae6ee7a6f..ed36a3124a 100644
--- a/Marlin/stepper.cpp
+++ b/Marlin/stepper.cpp
@@ -1081,21 +1081,17 @@ void Stepper::init() {
   #endif
 
   // waveform generation = 0100 = CTC
-  CBI(TCCR1B, WGM13);
-  SBI(TCCR1B, WGM12);
-  CBI(TCCR1A, WGM11);
-  CBI(TCCR1A, WGM10);
+  SET_WGM(1, WGM_CTC_OCRnA);
 
   // output mode = 00 (disconnected)
-  TCCR1A &= ~(3 << COM1A0);
-  TCCR1A &= ~(3 << COM1B0);
+  SET_COMS(1, COM_NORMAL, COM_NORMAL);
 
   // Set the timer pre-scaler
   // Generally we use a divider of 8, resulting in a 2MHz timer
   // frequency on a 16MHz MCU. If you are going to change this, be
   // sure to regenerate speed_lookuptable.h with
   // create_speed_lookuptable.py
-  TCCR1B = (TCCR1B & ~(0x07 << CS10)) | (2 << CS10);
+  SET_CS(1, CS_PRESCALER_8);  //  CS 2 = 1/8 prescaler
 
   // Init Stepper ISR to 122 Hz for quick starting
   OCR1A = 0x4000;