From d275fe0e83e7d29397413e4804069dc0ef139670 Mon Sep 17 00:00:00 2001 From: "D.R.racer" Date: Wed, 27 Jan 2021 09:33:28 +0100 Subject: [PATCH] Extract gcode filter from SdBaseFile into SdFile + optimization - Start saving instructions as the whole PR was >1KB long. - It turned out the compiler was unable to understand the core skipping cycle and an ASM version had to be used. - Add seekSet aware of the G-code filter --- Firmware/SdBaseFile.cpp | 118 +--------------------------- Firmware/SdBaseFile.h | 29 +------ Firmware/SdFile.cpp | 170 ++++++++++++++++++++++++++++++++++++++++ Firmware/SdFile.h | 16 +++- Firmware/SdVolume.h | 2 +- Firmware/cardreader.h | 4 +- Firmware/ultralcd.cpp | 4 +- 7 files changed, 193 insertions(+), 150 deletions(-) diff --git a/Firmware/SdBaseFile.cpp b/Firmware/SdBaseFile.cpp index 4dead898..4b19ceae 100644 --- a/Firmware/SdBaseFile.cpp +++ b/Firmware/SdBaseFile.cpp @@ -534,17 +534,6 @@ bool SdBaseFile::open(const char* path, uint8_t oflag) { return open(cwd_, path, oflag); } -bool SdBaseFile::openFilteredGcode(SdBaseFile* dirFile, const char* path){ - if( open(dirFile, path, O_READ) ){ - gf.reset(0,0); - // compute the block to start with - if( ! computeNextFileBlock(&gf.block, &gf.offset) ) - return false; - return true; - } else { - return false; - } -} //------------------------------------------------------------------------------ /** Open a file or directory by name. * @@ -1043,111 +1032,6 @@ int16_t SdBaseFile::read() { return read(&b, 1) == 1 ? b : -1; } -int16_t SdBaseFile::readFilteredGcode() { - // avoid calling the default heavy-weight read() for just one byte - return gf.read_byte(); -} - -void GCodeInputFilter::reset(uint32_t blk, uint16_t ofs){ - // @@TODO clean up - block = blk; - offset = ofs; - cachePBegin = sd->vol_->cache()->data; - // reset cache read ptr to its begin - cacheP = cachePBegin; -} - -int16_t GCodeInputFilter::read_byte(){ - EnsureBlock(); // this is unfortunate :( ... other calls are using the cache and we can loose the data block of our gcode file - - // assume, we have the 512B block cache filled and terminated with a '\n' -// SERIAL_PROTOCOLPGM("read_byte enter:"); -// for(uint8_t i = 0; i < 16; ++i){ -// SERIAL_PROTOCOL( cacheP[i] ); -// } - - const uint8_t *start = cacheP; - uint8_t consecutiveCommentLines = 0; - while( *cacheP == ';' ){ - for(;;){ - while( *(++cacheP) != '\n' ); // skip until a newline is found - // found a newline, prepare the next block if block cache end reached - if( cacheP - cachePBegin >= 512 ){ - // at the end of block cache, fill new data in - sd->curPosition_ += cacheP - start; - if( ! sd->computeNextFileBlock(&block, &offset) )goto fail; - EnsureBlock(); // fetch it into RAM - cacheP = start = cachePBegin; - } else { - if(++consecutiveCommentLines == 255){ - // SERIAL_PROTOCOLLN(sd->curPosition_); - goto forceExit; - } - // peek the next byte - we are inside the block at least at 511th index - still safe - if( *(cacheP+1) == ';' ){ - // consecutive comment - ++cacheP; - ++consecutiveCommentLines; - } - break; // found the real end of the line even across many blocks - } - } - } -forceExit: - sd->curPosition_ += cacheP - start + 1; - { - int16_t rv = *cacheP++; - - // prepare next block if needed - if( cacheP - cachePBegin >= 512 ){ -// SERIAL_PROTOCOLLN(sd->curPosition_); - if( ! sd->computeNextFileBlock(&block, &offset) )goto fail; - // don't need to force fetch the block here, it will get loaded on the next call - cacheP = cachePBegin; - } - return rv; - } -fail: -// SERIAL_PROTOCOLLNPGM("CacheFAIL"); - return -1; -} - -bool GCodeInputFilter::EnsureBlock(){ - if ( sd->vol_->cacheRawBlock(block, SdVolume::CACHE_FOR_READ)){ - // terminate with a '\n' - const uint16_t terminateOfs = (sd->fileSize_ - offset) < 512 ? (sd->fileSize_ - offset) : 512; - sd->vol_->cache()->data[ terminateOfs ] = '\n'; - return true; - } else { - return false; - } -} - -bool SdBaseFile::computeNextFileBlock(uint32_t *block, uint16_t *offset) { - // error if not open or write only - if (!isOpen() || !(flags_ & O_READ)) return false; - - *offset = curPosition_ & 0X1FF; // offset in block - if (type_ == FAT_FILE_TYPE_ROOT_FIXED) { - *block = vol_->rootDirStart() + (curPosition_ >> 9); - } else { - uint8_t blockOfCluster = vol_->blockOfCluster(curPosition_); - if (*offset == 0 && blockOfCluster == 0) { - // start of new cluster - if (curPosition_ == 0) { - // use first cluster in file - curCluster_ = firstCluster_; - } else { - // get next cluster from FAT - if (!vol_->fatGet(curCluster_, &curCluster_)) return false; - } - } - *block = vol_->clusterStartBlock(curCluster_) + blockOfCluster; - } - return true; -} - - //------------------------------------------------------------------------------ /** Read data from a file starting at the current position. * @@ -1561,7 +1445,7 @@ bool SdBaseFile::rmRfStar() { * \param[in] oflag Values for \a oflag are constructed by a bitwise-inclusive * OR of open flags. see SdBaseFile::open(SdBaseFile*, const char*, uint8_t). */ -SdBaseFile::SdBaseFile(const char* path, uint8_t oflag):gf(this) { +SdBaseFile::SdBaseFile(const char* path, uint8_t oflag) { type_ = FAT_FILE_TYPE_CLOSED; writeError = false; open(path, oflag); diff --git a/Firmware/SdBaseFile.h b/Firmware/SdBaseFile.h index 4b339587..ac39338d 100644 --- a/Firmware/SdBaseFile.h +++ b/Firmware/SdBaseFile.h @@ -176,32 +176,15 @@ uint16_t const FAT_DEFAULT_DATE = ((2000 - 1980) << 9) | (1 << 5) | 1; uint16_t const FAT_DEFAULT_TIME = (1 << 11); -class SdBaseFile; -class GCodeInputFilter { - SdBaseFile *sd; //@@TODO subject to removal - merge with some derived SdFileXX - const uint8_t *cachePBegin; - const uint8_t *cacheP; - bool EnsureBlock(); -public: - uint32_t block; // remember the current file block to be kept in cache - due to reuse of the memory, the block may fall out a must be read back - uint16_t offset; - -public: - inline GCodeInputFilter(SdBaseFile *sd):sd(sd){} - void reset(uint32_t blk, uint16_t ofs); - int16_t read_byte(); -}; - //------------------------------------------------------------------------------ /** * \class SdBaseFile * \brief Base class for SdFile with Print and C++ streams. */ class SdBaseFile { - GCodeInputFilter gf; public: /** Create an instance. */ - SdBaseFile() : gf(this), writeError(false), type_(FAT_FILE_TYPE_CLOSED) {} + SdBaseFile() : writeError(false), type_(FAT_FILE_TYPE_CLOSED) {} SdBaseFile(const char* path, uint8_t oflag); ~SdBaseFile() {if(isOpen()) close();} /** @@ -294,18 +277,13 @@ class SdBaseFile { bool open(SdBaseFile* dirFile, uint16_t index, uint8_t oflag); bool open(SdBaseFile* dirFile, const char* path, uint8_t oflag); bool open(const char* path, uint8_t oflag = O_READ); - bool openFilteredGcode(SdBaseFile* dirFile, const char* path); bool openNext(SdBaseFile* dirFile, uint8_t oflag); bool openRoot(SdVolume* vol); int peek(); static void printFatDate(uint16_t fatDate); static void printFatTime( uint16_t fatTime); bool printName(); - - int16_t readFilteredGcode(); - bool computeNextFileBlock(uint32_t *block, uint16_t *offset); - -private: +protected: int16_t read(); int16_t read(void* buf, uint16_t nbyte); public: @@ -347,8 +325,7 @@ public: SdVolume* volume() const {return vol_;} int16_t write(const void* buf, uint16_t nbyte); //------------------------------------------------------------------------------ - private: - friend class GCodeInputFilter; + protected: // allow SdFat to set cwd_ friend class SdFat; // global pointer to cwd dir diff --git a/Firmware/SdFile.cpp b/Firmware/SdFile.cpp index 2fb4d594..4787a093 100644 --- a/Firmware/SdFile.cpp +++ b/Firmware/SdFile.cpp @@ -30,6 +30,176 @@ */ SdFile::SdFile(const char* path, uint8_t oflag) : SdBaseFile(path, oflag) { } + +//size=100B +bool SdFile::openFilteredGcode(SdBaseFile* dirFile, const char* path){ + if( open(dirFile, path, O_READ) ){ + gfReset(0,0); + // compute the block to start with + if( ! gfComputeNextFileBlock() ) + return false; + return true; + } else { + return false; + } +} + +//size=90B +bool SdFile::seekSetFilteredGcode(uint32_t pos){ + bool rv = seekSet(pos); + gfComputeNextFileBlock(); + return rv; +} + +//size=50B +void SdFile::gfReset(uint32_t blk, uint16_t ofs){ + // @@TODO clean up + gfBlock = blk; + gfOffset = ofs; + gfCachePBegin = vol_->cache()->data; + // reset cache read ptr to its begin + gfCacheP = gfCachePBegin; +} + +//FORCE_INLINE const uint8_t * find_endl(const uint8_t *p){ +// while( *(++p) != '\n' ); // skip until a newline is found +// return p; +//} + +// think twice before allowing this to inline - manipulating 4B longs is costly +// moreover - this function has its parameters in registers only, so no heavy stack usage besides the call/ret +void __attribute__((noinline)) SdFile::gfUpdateCurrentPosition(uint16_t inc){ + curPosition_ += inc; +} + +#define find_endl(resultP, startP) \ +__asm__ __volatile__ ( \ +"cycle: \n" \ +"ld r22, Z+ \n" \ +"cpi r22, 0x0A \n" \ +"brne cycle \n" \ +: "=z" (resultP) /* result of the ASM code - in our case the Z register (R30:R31) */ \ +: "z" (startP) /* input of the ASM code - in our case the Z register as well (R30:R31) */ \ +: "r22" /* modifying register R22 - so that the compiler knows */ \ +) + +//size=400B +// avoid calling the default heavy-weight read() for just one byte +int16_t SdFile::readFilteredGcode(){ + gfEnsureBlock(); // this is unfortunate :( ... other calls are using the cache and we can loose the data block of our gcode file + + // assume, we have the 512B block cache filled and terminated with a '\n' +// SERIAL_PROTOCOLPGM("read_byte enter:"); +// for(uint8_t i = 0; i < 16; ++i){ +// SERIAL_PROTOCOL( cacheP[i] ); +// } + + const uint8_t *start = gfCacheP; + uint8_t consecutiveCommentLines = 0; + while( *gfCacheP == ';' ){ + for(;;){ + + //while( *(++gfCacheP) != '\n' ); // skip until a newline is found - suboptimal code! + // Wondering, why this "nice while cycle" is done in such a weird way using a separate find_endl() function? + // Have a look at the ASM code GCC produced! + + // At first - a separate find_endl() makes the compiler understand, + // that I don't need to store gfCacheP every time, I'm only interested in the final address where the '\n' was found + // - the cycle can run on CPU registers only without touching memory besides reading the character being compared. + // Not only makes the code run considerably faster, but is also 40B shorter! + // This was the generated code: + //FORCE_INLINE const uint8_t * find_endl(const uint8_t *p){ + // while( *(++p) != '\n' ); // skip until a newline is found + // return p; } + // 11c5e: movw r30, r18 + // 11c60: subi r18, 0xFF ; 255 + // 11c62: sbci r19, 0xFF ; 255 + // 11c64: ld r22, Z + // 11c66: cpi r22, 0x0A ; 10 + // 11c68: brne .-12 ; 0x11c5e + + // Still, even that was suboptimal as the compiler seems not to understand the usage of ld r22, Z+ (the plus is important) + // aka automatic increment of the Z register (R30:R31 pair) + // There is no other way than pure ASM! + find_endl(gfCacheP, gfCacheP); + + // found a newline, prepare the next block if block cache end reached + if( gfCacheP - gfCachePBegin >= 512 ){ + // at the end of block cache, fill new data in + gfUpdateCurrentPosition( gfCacheP - start ); + if( ! gfComputeNextFileBlock() )goto fail; + gfEnsureBlock(); // fetch it into RAM + gfCacheP = start = gfCachePBegin; + } else { + if(++consecutiveCommentLines == 255){ + // SERIAL_PROTOCOLLN(sd->curPosition_); + goto forceExit; + } + // peek the next byte - we are inside the block at least at 511th index - still safe + if( *(gfCacheP+1) == ';' ){ + // consecutive comment + ++gfCacheP; + ++consecutiveCommentLines; + } + break; // found the real end of the line even across many blocks + } + } + } +forceExit: + { + gfUpdateCurrentPosition( gfCacheP - start + 1 ); + int16_t rv = *gfCacheP++; + + // prepare next block if needed + if( gfCacheP - gfCachePBegin >= 512 ){ + if( ! gfComputeNextFileBlock() )goto fail; + // don't need to force fetch the block here, it will get loaded on the next call + gfCacheP = gfCachePBegin; + } + return rv; + } +fail: +// SERIAL_PROTOCOLLNPGM("CacheFAIL"); + return -1; +} + +//size=100B +bool SdFile::gfEnsureBlock(){ + if ( vol_->cacheRawBlock(gfBlock, SdVolume::CACHE_FOR_READ)){ + // terminate with a '\n' + const uint16_t terminateOfs = (fileSize_ - gfOffset) < 512 ? (fileSize_ - gfOffset) : 512U; + vol_->cache()->data[ terminateOfs ] = '\n'; + return true; + } else { + return false; + } +} + +//size=350B +bool SdFile::gfComputeNextFileBlock() { + // error if not open or write only + if (!isOpen() || !(flags_ & O_READ)) return false; + + gfOffset = curPosition_ & 0X1FF; // offset in block + if (type_ == FAT_FILE_TYPE_ROOT_FIXED) { + gfBlock = vol_->rootDirStart() + (curPosition_ >> 9); + } else { + uint8_t blockOfCluster = vol_->blockOfCluster(curPosition_); + if (gfOffset == 0 && blockOfCluster == 0) { + // start of new cluster + if (curPosition_ == 0) { + // use first cluster in file + curCluster_ = firstCluster_; + } else { + // get next cluster from FAT + if (!vol_->fatGet(curCluster_, &curCluster_)) return false; + } + } + gfBlock = vol_->clusterStartBlock(curCluster_) + blockOfCluster; + } + return true; +} + //------------------------------------------------------------------------------ /** Write data to an open file. * diff --git a/Firmware/SdFile.h b/Firmware/SdFile.h index 60e2f5de..b4c91a7e 100644 --- a/Firmware/SdFile.h +++ b/Firmware/SdFile.h @@ -34,7 +34,16 @@ * \brief SdBaseFile with Print. */ class SdFile : public SdBaseFile/*, public Print*/ { - public: + // GCode filtering vars and methods - due to optimization reasons not wrapped in a separate class + const uint8_t *gfCachePBegin; + const uint8_t *gfCacheP; + uint32_t gfBlock; // remember the current file block to be kept in cache - due to reuse of the memory, the block may fall out a must be read back + uint16_t gfOffset; + void gfReset(uint32_t blk, uint16_t ofs); + bool gfEnsureBlock(); + bool gfComputeNextFileBlock(); + void gfUpdateCurrentPosition(uint16_t inc); +public: SdFile() {} SdFile(const char* name, uint8_t oflag); #if ARDUINO >= 100 @@ -43,6 +52,9 @@ class SdFile : public SdBaseFile/*, public Print*/ { void write(uint8_t b); #endif + bool openFilteredGcode(SdBaseFile* dirFile, const char* path); + int16_t readFilteredGcode(); + bool seekSetFilteredGcode(uint32_t pos); int16_t write(const void* buf, uint16_t nbyte); void write(const char* str); void write_P(PGM_P str); @@ -51,4 +63,4 @@ class SdFile : public SdBaseFile/*, public Print*/ { #endif // SdFile_h -#endif \ No newline at end of file +#endif diff --git a/Firmware/SdVolume.h b/Firmware/SdVolume.h index 7c4fce95..17699190 100644 --- a/Firmware/SdVolume.h +++ b/Firmware/SdVolume.h @@ -119,7 +119,7 @@ class SdVolume { bool dbgFat(uint32_t n, uint32_t* v) {return fatGet(n, v);} //------------------------------------------------------------------------------ private: - friend class GCodeInputFilter; + friend class SdFile; // Allow SdBaseFile access to SdVolume private data. friend class SdBaseFile; diff --git a/Firmware/cardreader.h b/Firmware/cardreader.h index 1c630b66..8dfb68c9 100644 --- a/Firmware/cardreader.h +++ b/Firmware/cardreader.h @@ -1,6 +1,8 @@ #ifndef CARDREADER_H #define CARDREADER_H +#define SDSUPPORT + #ifdef SDSUPPORT #define MAX_DIR_DEPTH 10 @@ -64,7 +66,7 @@ public: //@@TODO potential performance problem - when the comment reading fails, sdpos points to the last correctly read character. // However, repeated reading (e.g. after power panic) the comment will be read again - it should survive correctly, it will just take a few moments to skip FORCE_INLINE int16_t getFilteredGcodeChar() { sdpos = file.curPosition();return (int16_t)file.readFilteredGcode();}; - FORCE_INLINE void setIndex(long index) {sdpos = index;file.seekSet(index);}; + /*FORCE_INLINE*/ void setIndex(long index) {sdpos = index;file.seekSetFilteredGcode(index);}; FORCE_INLINE uint8_t percentDone(){if(!isFileOpen()) return 0; if(filesize) return sdpos/((filesize+99)/100); else return 0;}; FORCE_INLINE char* getWorkDirName(){workDir.getFilename(filename);return filename;}; FORCE_INLINE uint32_t get_sdpos() { if (!isFileOpen()) return 0; else return(sdpos); }; diff --git a/Firmware/ultralcd.cpp b/Firmware/ultralcd.cpp index 3fc5f5a6..78dddf2b 100755 --- a/Firmware/ultralcd.cpp +++ b/Firmware/ultralcd.cpp @@ -8473,10 +8473,8 @@ static void lcd_selftest_screen_step(int _row, int _col, int _state, const char /** Menu action functions **/ static bool check_file(const char* filename) { - return true; // @@TODO - if (farm_mode) return true; - card.openFileFilteredGcode((char*)filename, true); //@@TODO + card.openFileFilteredGcode((char*)filename, true); bool result = false; const uint32_t filesize = card.getFileSize(); uint32_t startPos = 0;