Last Modified: | March 29, 2014, at 08:34 AM |
By: | robtillaart |
Platforms: | UNO (others not tested) |
The functions presented here are experimental and possibly do not implement all the details of IEEE754, so use with care.
This page describes a number of functions and code snippets to manipulate IEEE754 32 bit floats. It contains functions to extract sign, exponent and mantisse so one can manipulate floats on bit level. Also code is provided to slash CPU cycles from some elementary float functions.
One of the remarks often made on the Arduino is that it does not support IEEE754 64 bit doubles. Two functions on this page allow to pack and unpack a 32bit float into a 8byte array which represents a IEEE754 64bit double. This allows the exchange of 64bit doubles to and from Arduino e.g. over Serial with conversion done at Arduino side.
In the current implementation the code is presented as plain utility functions. These might become wrapped into Classes in the future.
The first functionality the lib offers is IEEE754 datatypes. Every float exists of three parts. The sign, the exponent and the mantisse. For a full description please see: 32 bit float -- 64 bit double
struct IEEEfloat; struct IEEEdouble; struct _DBL; // Arduino variant for IEEEdouble union _FLOATCONV; // mapping float to other representations union _DBLCONV; // mapping _DBL to other representations
There are two functions for debugging. There is no dump function for a double (yet).
void dumpFloat(float number); void dumpDBL(struct _DBL dbl);
There are two functions to convert a 32bit float into a packed byte array which represents a double. This can be used e.g. to send a float to a PC where it is received a a 64bit double or the other way around. As 64 bit has more significant digits the conversion from 32->64 bit will work quite well, but from 64->32 will loose significant digits and can be to large to be represented in a 32 bit. In this latter case NAN will be returned.
Note that these conversion functions are experimental.
void float2DoublePacked(float number, byte* bar, int byteOrder=LSBFIRST); float doublePacked2Float(byte* bar, int byteOrder=LSBFIRST)
Note the communication is binary, not text-mode
// // FILE: sendDouble.ino // AUTHOR: Rob Tillaart // VERSION: 0.1.00 // PURPOSE: sends an expanded float as double to PC. // // Released to the public domain // #include <IEEE754tools.h> void setup() { Serial.begin(115200); } void loop() { float f = analogRead(A0) * 5.0 / 1024; sendDouble(f); delay(100); sendDouble(PI); // reference delay(100); } void sendDouble(float number) { byte x[8] = { 0,0,0,0, 0,0,0,0 }; float2DoublePacked(number, x); // simple dump, no handshake or packetizing for (int i=0; i<8;i++) Serial.write(x[i]); } // END OF FILE
import serial from struct import unpack ser = serial.Serial("COM31", 115200) def h(): return unpack('d', ser.read(8)) def main(): ser.close() ser.open() while(True): print ser.inWaiting() print h() if __name__ == '__main__': main() # END OF FILE
Note the communication is binary, not text-mode
// // FILE: echoDouble.ino // AUTHOR: Rob Tillaart // VERSION: 0.1.00 // PURPOSE: sends an expanded float as double to PC. // // Released to the public domain // #include <IEEE754tools.h> void setup() { Serial.begin(115200); } void loop() { float f = receiveDouble(); sendDouble(f+1); delay(100); } float receiveDouble() { byte x[8]; // wait for 8 bytes while (Serial.available() < 8); for (int i=0; i<8;i++) x[i] = Serial.read(); return doublePacked2Float(x); } void sendDouble(float number) { byte x[8] = { 0,0,0,0, 0,0,0,0 }; float2DoublePacked(number, x); // simple dump, no handshake or packetizing for (int i=0; i<8;i++) Serial.write(x[i]); } // END OF FILE
import serial from struct import pack, unpack from math import pi import time ser = serial.Serial("COM31", 115200) def getDouble(): return unpack('d', ser.read(8)) def sendDouble(f): x = pack('d', f) for i in range(8): ser.write(x[i]) def main(): ser.close() ser.open() # give the Arduino some time to 'boot' time.sleep(2.0) while(True): sendDouble(pi) print getDouble() if __name__ == '__main__': main() # END OF FILE
To use the library, make a folder in your SKETCHBOOKPATH\libaries with the name IEEE754tools and put the IEEE754tools.h there.
Enjoy tinkering,
rob.tillaart@removethisgmail.com
// // FILE: IEEE754tools.h // AUTHOR: Rob Tillaart // VERSION: 0.1.00 // PURPOSE: IEEE754 tools // // https://playground.arduino.cc//Main/IEEE754tools // // Released to the public domain // not tested, use with care // #ifndef IEEE754tools_h #define IEEE754tools_h #if defined(ARDUINO) && ARDUINO >= 100 #include "Arduino.h" #else #include "WProgram.h" #endif // (un)comment lines to configure functionality / size //#define IEEE754_ENABLE_MSB #define IEEE754_ENABLE_DUMP // IEEE754 float layout; struct IEEEfloat { uint32_t m:23; uint8_t e:8; uint8_t s:1; }; // IEEE754 double layout; struct IEEEdouble { uint64_t m:52; uint16_t e:11; uint8_t s:1; }; // Arduino UNO double layout: // the UNO has no 64 bit double, it is only able to map 23 bits of the mantisse // a filler is added. struct _DBL { uint32_t filler:29; uint32_t m:23; uint16_t e:11; uint8_t s:1; }; // for packing and unpacking a float typedef union _FLOATCONV { IEEEfloat p; float f; byte b[4]; } _FLOATCONV; // for packing and unpacking a double typedef union _DBLCONV { // IEEEdouble p; _DBL p; double d; // !! is a 32bit float for UNO. byte b[4]; } _DBLCONV; #ifdef IEEE754_ENABLE_DUMP // print float components void dumpFloat(float number) { IEEEfloat* x = (IEEEfloat*) ((void*)&number); Serial.print(x->s, HEX); Serial.print("\t"); Serial.print(x->e, HEX); Serial.print("\t"); Serial.println(x->m, HEX); // Serial.print(" sign: "); Serial.print(x->s); // Serial.print(" exp: "); Serial.print(x->e); // Serial.print(" mant: "); Serial.println(x->m); } // print "double" components void dumpDBL(struct _DBL dbl) { Serial.print(dbl.s, HEX); Serial.print("\t"); Serial.print(dbl.e, HEX); Serial.print("\t"); Serial.println(dbl.m, HEX); } #endif // // converts a float to a packed array of 8 bytes representing a 64 bit double // restriction exponent and mantisse. // // float; array of 8 bytes; LSBFIRST; MSBFIRST // void float2DoublePacked(float number, byte* bar, int byteOrder=LSBFIRST) { _FLOATCONV fl; fl.f = number; _DBLCONV dbl; dbl.p.s = fl.p.s; dbl.p.e = fl.p.e-127 +1023; // exponent adjust dbl.p.m = fl.p.m; #ifdef IEEE754_ENABLE_MSB if (byteOrder == LSBFIRST) { #endif for (int i=0; i<8; i++) { bar[i] = dbl.b[i]; } #ifdef IEEE754_ENABLE_MSB } else { for (int i=0; i<8; i++) { bar[i] = dbl.b[7-i]; } } #endif } // // converts a packed array of bytes into a 32bit float. // there can be an exponent overflow // the mantisse is truncated to 23 bits. // float doublePacked2Float(byte* bar, int byteOrder=LSBFIRST) { _FLOATCONV fl; _DBLCONV dbl; #ifdef IEEE754_ENABLE_MSB if (byteOrder == LSBFIRST) { #endif for (int i=0; i<8; i++) { dbl.b[i] = bar[i]; } #ifdef IEEE754_ENABLE_MSB } else { for (int i=0; i<8; i++) { dbl.b[i] = bar[7-i]; } } #endif int e = dbl.p.e-1023+127; // exponent adjust // TODO check exponent overflow. if (e >=0 && e <= 255) { fl.p.s = dbl.p.s; fl.p.e = e; fl.p.m = dbl.p.m; // note this one clips the mantisse } else fl.f = NAN; return fl.f; } // // NOT TESTED FUNCTIONS // uint8_t getSign(float number) { IEEEfloat* x = (IEEEfloat*) ((void*)&number); return x->s; } int getExponent(float number) { IEEEfloat* x = (IEEEfloat*) ((void*)&number); return x->e - 127; } uint32_t getMantisse(float number) { IEEEfloat* x = (IEEEfloat*) ((void*)&number); return x->m; } /* // ONELINERS to speed up some specific 32 bit float math // *(((byte*) &number)+3) &= 0x7F; // number == fabs(number); // x = *(((byte*) &number)+3) & 0x7F; // x = fabs(number); // GAIN = factor 2 // *(((byte*) &number)+3) |= 0x80; // number = -fabs(number); // x = *(((byte*) &number)+3) | 0x80; // x = -fabs(number); // GAIN = factor 2 // *(((byte*) &number)+3) ^= 0x80; // number = -number; // x = *(((byte*) &number)+3) ^ 0x80; // x = -number; // GAIN = factor 2 // s = *(((uint8_t*) &number)+3) & 0x80; // s = sign(number); // if ( *(((byte*) &number)+3) & 0x80) x=2; // if (number < 0) x=2; // GAIN = factor 5 int getExponent(float number) { uint8_t e = (*(((uint8_t*) &number)+3) & 0x7F) << 1; if (*(((uint8_t*) &number)+2) & 0x80) e++; return e; } */ #endif // END OF FILE