/* ======================================================================== */ /* LTO Flash / JLP Extended ISA */ /* ======================================================================== */ #include "config.h" #include "periph/periph.h" #include "cp1600/cp1600.h" #include "cp1600/op_decode.h" #include "cp1600/op_exec_ext.h" typedef void fnx_t(const uint16_t, const uint16_t, uint16_t *const, uint16_t *const); #define FN(x) LOCAL void fnx_##x( const uint16_t s1, const uint16_t s2, \ uint16_t *const dl, uint16_t *const dh ) LOCAL INLINE uint16_t pack(uint8_t hi, uint8_t lo) { return ((uint16_t)hi << 8) | lo; } LOCAL INLINE uint16_t swap(const uint16_t x) { return (x >> 8) | (x << 8); } LOCAL INLINE int32_t as_i32(const uint16_t x) { return (int32_t)(x ^ 0x8000u) - 0x8000; } LOCAL INLINE int32_t as_i32s(const uint16_t fx) { const uint16_t x = swap(fx); return (int32_t)(x ^ 0x8000u) - 0x8000; } LOCAL INLINE int16_t as_i16(const uint16_t x) { return (x & 0x7FFF) - (x & 0x8000); } LOCAL INLINE int16_t as_i16s(const uint16_t fx) { const uint16_t x = swap(fx); return (x & 0x7FFF) - (x & 0x8000); } LOCAL INLINE uint32_t as_u32 (const uint16_t x) { return x; } LOCAL INLINE uint32_t as_u32s(const uint16_t x) { return swap(x); } LOCAL INLINE uint16_t as_u16 (const uint16_t x) { return x; } LOCAL INLINE uint16_t as_u16s(const uint16_t x) { return swap(x); } FN(add3) { UNUSED(dh); *dl = s1 + s2; } FN(nadd) { UNUSED(dh); *dl = -(s1 + s2); } FN(sub3) { UNUSED(dh); *dl = s1 - s2; } FN(addfx) { UNUSED(dh); *dl = swap( swap(s1) + swap(s2)); } FN(naddfx) { UNUSED(dh); *dl = swap(-swap(s1) - swap(s2)); } FN(subfx) { UNUSED(dh); *dl = swap( swap(s1) - swap(s2)); } FN(and3) { UNUSED(dh); *dl = s1 & s2; } FN(nand) { UNUSED(dh); *dl = ~( s1 & s2); } FN(andn) { UNUSED(dh); *dl = ~s1 & s2; } FN(orn) { UNUSED(dh); *dl = ~(~s1 & s2); } FN(or3) { UNUSED(dh); *dl = s1 | s2; } FN(nor) { UNUSED(dh); *dl = ~( s1 | s2); } FN(xor3) { UNUSED(dh); *dl = s1 ^ s2; } FN(xnor) { UNUSED(dh); *dl = ~( s1 ^ s2); } FN(shl3) { const int s = s2 & 0xF; if (!s) { *dl = s1 << s; *dh = as_i32(s1) >> 15; } else { *dl = s1 << s; *dh = as_i32(s1) >> (16 - s); } } FN(shlu) { const int s = s2 & 0xF; if (!s) { *dl = s1 << s; *dh = 0; } else { *dl = s1 << s; *dh = as_u32(s1) >> (16 - s); } } FN(shr3) { const int s = s2 & 0xF; if (!s) { *dl = as_i32(s1) >> s; *dh = 0; } else { *dl = as_i32(s1) >> s; *dh = s1 << (16 - s); } } FN(shru) { const int s = s2 & 0xF; if (!s) { *dl = as_u32(s1) >> s; *dh = 0; } else { *dl = as_u32(s1) >> s; *dh = s1 << (16 - s); } } FN(bshlu) { const int s = (s2 & 0xF) < 8 ? s2 & 0xF : 8; const int ns = 8 - s; const uint8_t dlh = ((s1 >> 8) & 0xFF) << s; const uint8_t dll = (s1 & 0xFF) << s; const uint8_t dhh = ((s1 >> 8) & 0xFF) >> ns; const uint8_t dhl = (s1 & 0xFF) >> ns; *dl = pack(dlh, dll); *dh = pack(dhh, dhl); } FN(bshru) { const int s = (s2 & 0xF) < 8 ? s2 & 0xF : 8; const int ns = 8 - s; const uint8_t dlh = ((s1 >> 8) & 0xFF) >> s; const uint8_t dll = (s1 & 0xFF) >> s; const uint8_t dhh = ((s1 >> 8) & 0xFF) << ns; const uint8_t dhl = (s1 & 0xFF) << ns; *dl = pack(dlh, dll); *dh = pack(dhh, dhl); } FN(rol) { const int s = s2 & 0xF, ns = 16 - s; UNUSED(dh); *dl = (((uint32_t)s1) << s) | (s1 >> ns); } FN(ror) { const int s = s2 & 0xF, ns = 16 - s; UNUSED(dh); *dl = (s1 >> s) | (((uint32_t)s1) << ns); } FN(mpyss) { const int32_t p = as_i32(s1) * as_i32(s2); *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } FN(mpysu) { const int32_t p = as_i32(s1) * as_u32(s2); *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } FN(mpyus) { const int32_t p = as_u32(s1) * as_i32(s2); *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } FN(mpyuu) { const uint32_t p = as_u32(s1) * as_u32(s2); *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } FN(mpy16) { UNUSED(dh); *dl = s1 * s2; } FN(mpyfxss) { const int32_t p = as_i32s(s1) * as_i32s(s2); *dl = (p & 0xFF00) | ((p >> 16) & 0x00FF); UNUSED(dh); } FN(mpyfxsu) { const int32_t p = as_i32s(s1) * as_u32s(s2); *dl = (p & 0xFF00) | ((p >> 16) & 0x00FF); UNUSED(dh); } FN(mpyfxus) { const int32_t p = as_u32s(s1) * as_i32s(s2); *dl = (p & 0xFF00) | ((p >> 16) & 0x00FF); UNUSED(dh); } FN(mpyfxuu) { const uint32_t p = as_u32s(s1) * as_u32s(s2); *dl = (p & 0xFF00) | ((p >> 16) & 0x00FF); UNUSED(dh); } FN(divs) { const int16_t i1 = as_i16(s1), i2 = as_i16(s2); if (s2) { *dl = i1 / i2; *dh = i1 % i2; } else { *dl = 0x7FFF; *dh = 0x7FFF; } } FN(divu) { if (s2) { *dl = s1 / s2; *dh = s1 % s2; } else { *dl = 0xFFFF; *dh = 0xFFFF; } } FN(divfxs) { const int32_t ss1 = as_i32(swap(s1)); const int32_t i1 = SLS32(ss1, 8), i2 = as_i32(swap(s2)); *dl = swap(0x7FFF); *dh = swap(0x7FFF); if (s2) { const int32_t q = i1 / i2; const int32_t r = i1 % i2; if (q >= -0x8000 && q <= 0x7FFF) { *dl = swap(q); *dh = swap(r); } } } FN(divfxu) { const uint32_t i1 = as_u32(swap(s1)) << 8, i2 = as_u32(swap(s2)); *dl = swap(0xFFFF); *dh = swap(0xFFFF); if (s2) { const uint32_t q = i1 / i2; const uint32_t r = i1 % i2; if (q <= 0xFFFF) { *dl = swap(q); *dh = swap(r); } } } // DIV32SS/DIV32UU need to be handled specially FN(bitcntl) { uint16_t value = s1; int to_count = (s2 & 0xF) + 1; int bits = 0; UNUSED(dh); while (to_count > 0) { if (value & 0x8000) bits++; value <<= 1; to_count--; } *dl = bits; } FN(bitcntr) { uint16_t value = s1; int to_count = (s2 & 0xF) + 1; int bits = 0; UNUSED(dh); while (to_count > 0) { if (value & 1) bits++; value >>= 1; to_count--; } *dl = bits; } FN(bitrevl) { uint16_t value_in = s1; uint16_t value_out = 0; int to_reverse = (s2 & 0xF) + 1; UNUSED(dh); while (to_reverse > 0) { value_out >>= 1; if (value_in & 0x8000) value_out |= 0x8000; value_in <<= 1; to_reverse--; } *dl = value_out; } FN(bitrevr) { uint16_t value_in = s1; uint16_t value_out = 0; int to_reverse = (s2 & 0xF) + 1; UNUSED(dh); while (to_reverse > 0) { value_out <<= 1; if (value_in & 1) value_out |= 1; value_in >>= 1; to_reverse--; } *dl = value_out; } FN(lmo) { int i; UNUSED(dh); for (i = s2 & 0xF; i >= 0; i--) { if (s1 & (1u << i)) { *dl = i; return; } } *dl = 0xFFFF; return; } FN(lmz) { int i; UNUSED(dh); for (i = s2 & 0xF; i >= 0; i--) { if ((s1 & (1u << i)) == 0) { *dl = i; return; } } *dl = 0xFFFF; return; } FN(rmo) { int i; UNUSED(dh); for (i = s2 & 0xF; i <= 15; i++) { if (s1 & (1u << i)) { *dl = i; return; } } *dl = 0xFFFF; return; } FN(rmz) { int i; UNUSED(dh); for (i = s2 & 0xF; i <= 15; i++) { if ((s1 & (1u << i)) == 0) { *dl = i; return; } } *dl = 0xFFFF; return; } FN(subabs) { const int diff = as_i16(s1) - as_i16(s2); const int absdiff = diff < 0 ? -diff : diff; UNUSED(dh); *dl = absdiff & 0xFFFF; } FN(subabsu) { const int diff = as_u16(s1) - as_u16(s2); const int absdiff = diff < 0 ? -diff : diff; UNUSED(dh); *dl = absdiff & 0xFFFF; } FN(subabsfx) { const int diff = as_i16(swap(s1)) - as_i16(swap(s2)); const int absdiff = diff < 0 ? -diff : diff; UNUSED(dh); *dl = swap(absdiff & 0xFFFF); } FN(subabsfxu) { const int diff = as_u16(swap(s1)) - as_u16(swap(s2)); const int absdiff = diff < 0 ? -diff : diff; UNUSED(dh); *dl = swap(absdiff & 0xFFFF); } FN(dist) { const uint32_t u1 = abs(as_i32(s1)); const uint32_t u2 = abs(as_i32(s2)); const uint32_t mx = u1 > u2 ? u1 : u2; const uint32_t mn = u1 > u2 ? u2 : u1; UNUSED(dh); *dl = (mx * 123 + mn * 51) >> 7; } FN(distu) { const uint32_t u1 = as_u32(s1); const uint32_t u2 = as_u32(s2); const uint32_t mx = u1 > u2 ? u1 : u2; const uint32_t mn = u1 > u2 ? u2 : u1; UNUSED(dh); *dl = (mx * 123 + mn * 51) >> 7; } FN(distfx) { const uint32_t u1 = abs(as_i32(swap(s1))); const uint32_t u2 = abs(as_i32(swap(s2))); const uint32_t mx = u1 > u2 ? u1 : u2; const uint32_t mn = u1 > u2 ? u2 : u1; UNUSED(dh); *dl = swap((mx * 123 + mn * 51) >> 7); } FN(distfxu) { const uint32_t u1 = as_u32(swap(s1)); const uint32_t u2 = as_u32(swap(s2)); const uint32_t mx = u1 > u2 ? u1 : u2; const uint32_t mn = u1 > u2 ? u2 : u1; UNUSED(dh); *dl = swap((mx * 123 + mn * 51) >> 7); } FN(min) { UNUSED(dh); *dl = as_i16 (s1) < as_i16 (s2) ? s1 : s2; } FN(minfx) { UNUSED(dh); *dl = as_i16s(s1) < as_i16s(s2) ? s1 : s2; } FN(minu) { UNUSED(dh); *dl = as_u16 (s1) < as_u16 (s2) ? s1 : s2; } FN(minfxu) { UNUSED(dh); *dl = as_u16s(s1) < as_u16s(s2) ? s1 : s2; } FN(max) { UNUSED(dh); *dl = as_i16 (s1) > as_i16 (s2) ? s1 : s2; } FN(maxfx) { UNUSED(dh); *dl = as_i16s(s1) > as_i16s(s2) ? s1 : s2; } FN(maxu) { UNUSED(dh); *dl = as_u16 (s1) > as_u16 (s2) ? s1 : s2; } FN(maxfxu) { UNUSED(dh); *dl = as_u16s(s1) > as_u16s(s2) ? s1 : s2; } FN(addcirc) { const uint16_t mask = (1u << (s2 & 0xF)) - 1; const uint16_t sum = (*dl + s1) & mask; const uint16_t keep = *dl & ~mask; UNUSED(dh); *dl = keep | sum; } FN(subcirc) { const uint16_t mask = (1u << (s2 & 0xF)) - 1; const uint16_t sum = (*dl - s1) & mask; const uint16_t keep = *dl & ~mask; UNUSED(dh); *dl = keep | sum; } FN(repack) { *dl = pack(s1 & 0xFF, s2 & 0xFF); *dh = pack(s1 >> 8, s2 >> 8); } FN(packl) { UNUSED(dh); *dl = pack(s1 & 0xFF, s2 & 0xFF); } FN(packh) { UNUSED(dh); *dl = pack(s1 >> 8, s2 >> 8); } FN(packlh) { UNUSED(dh); *dl = pack(s1 & 0xFF, s2 >> 8); } FN(btog) { UNUSED(dh); *dl = s1 ^ (1u << (s2 & 0xF)); } FN(bset) { UNUSED(dh); *dl = s1 | (1u << (s2 & 0xF)); } FN(bclr) { UNUSED(dh); *dl = s1 & ~(1u << (s2 & 0xF)); } FN(cmpltu) { UNUSED(dh); *dl = as_u16(s1) < as_u16(s2) ? 0xFFFFu : 0; } FN(cmpleu) { UNUSED(dh); *dl = as_u16(s1) <= as_u16(s2) ? 0xFFFFu : 0; } FN(cmpltua) { UNUSED(dh); *dl &= as_u16(s1) < as_u16(s2) ? 0xFFFFu : 0; } FN(cmpleua) { UNUSED(dh); *dl &= as_u16(s1) <= as_u16(s2) ? 0xFFFFu : 0; } FN(cmpltfxu) { UNUSED(dh); *dl = as_u16s(s1) < as_u16s(s2) ? 0xFFFFu : 0; } FN(cmplefxu) { UNUSED(dh); *dl = as_u16s(s1) <= as_u16s(s2) ? 0xFFFFu : 0; } FN(cmpltfxua) { UNUSED(dh); *dl &= as_u16s(s1) < as_u16s(s2) ? 0xFFFFu : 0; } FN(cmplefxua) { UNUSED(dh); *dl &= as_u16s(s1) <= as_u16s(s2) ? 0xFFFFu : 0; } FN(cmplt) { UNUSED(dh); *dl = as_i16(s1) < as_i16(s2) ? 0xFFFFu : 0; } FN(cmple) { UNUSED(dh); *dl = as_i16(s1) <= as_i16(s2) ? 0xFFFFu : 0; } FN(cmplta) { UNUSED(dh); *dl &= as_i16(s1) < as_i16(s2) ? 0xFFFFu : 0; } FN(cmplea) { UNUSED(dh); *dl &= as_i16(s1) <= as_i16(s2) ? 0xFFFFu : 0; } FN(cmpltfx) { UNUSED(dh); *dl = as_i16s(s1) < as_i16s(s2) ? 0xFFFFu : 0; } FN(cmplefx) { UNUSED(dh); *dl = as_i16s(s1) <= as_i16s(s2) ? 0xFFFFu : 0; } FN(cmpltfxa) { UNUSED(dh); *dl &= as_i16s(s1) < as_i16s(s2) ? 0xFFFFu : 0; } FN(cmplefxa) { UNUSED(dh); *dl &= as_i16s(s1) <= as_i16s(s2) ? 0xFFFFu : 0; } FN(cmpeq) { UNUSED(dh); *dl = s1 == s2 ? 0xFFFFu : 0; } FN(cmpne) { UNUSED(dh); *dl = s1 != s2 ? 0xFFFFu : 0; } FN(cmpeqa) { UNUSED(dh); *dl &= s1 == s2 ? 0xFFFFu : 0; } FN(cmpnea) { UNUSED(dh); *dl &= s1 != s2 ? 0xFFFFu : 0; } FN(bound) { const int16_t i1 = as_i16(s1), i2 = as_i16(s2), id = as_i16(*dl); const int16_t mn = i1 < i2 ? i1 : i2; const int16_t mx = i1 > i2 ? i1 : i2; const int16_t cl = mn > id ? mn : id; const int16_t ch = cl > mx ? mx : cl; *dl = ch; UNUSED(dh); } FN(boundu) { const uint16_t i1 = as_u16(s1), i2 = as_u16(s2), id = as_u16(*dl); const uint16_t mn = i1 < i2 ? i1 : i2; const uint16_t mx = i1 > i2 ? i1 : i2; const uint16_t cl = mn > id ? mn : id; const uint16_t ch = cl > mx ? mx : cl; *dl = ch; UNUSED(dh); } FN(boundfx) { const int16_t i1 = as_i16s(s1), i2 = as_i16s(s2), id = as_i16s(*dl); const int16_t mn = i1 < i2 ? i1 : i2; const int16_t mx = i1 > i2 ? i1 : i2; const int16_t cl = mn > id ? mn : id; const int16_t ch = cl > mx ? mx : cl; *dl = swap(ch); UNUSED(dh); } FN(boundfxu) { const uint16_t i1 = as_u16s(s1), i2 = as_u16s(s2), id = as_u16s(*dl); const uint16_t mn = i1 < i2 ? i1 : i2; const uint16_t mx = i1 > i2 ? i1 : i2; const uint16_t cl = mn > id ? mn : id; const uint16_t ch = cl > mx ? mx : cl; *dl = swap(ch); UNUSED(dh); } FN(aal) { UNUSED(*dh); *dl = (((( s1 & 0xFF) - 0x20) & 0x1FF) << 3) + s2; } FN(aah) { UNUSED(*dh); *dl = (((((s1>>8) & 0xFF) - 0x20) & 0x1FF) << 3) + s2; } FN(sumsq) { const int64_t l1 = as_i32(s1), l2 = as_i32(s2); const int64_t p = l1 * l1 + l2 * l2; *dl = 0xFFFF; *dh = 0xFFFF; if (p < 0xFFFFFFFFll) { *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } } FN(sumsqu) { const int64_t l1 = as_u32(s1), l2 = as_u32(s2); const int64_t p = l1 * l1 + l2 * l2; *dl = 0xFFFF; *dh = 0xFFFF; if (p < 0xFFFFFFFFll) { *dl = p & 0xFFFF; *dh = (p >> 16) & 0xFFFF; } } FN(sumsqfx) { fnx_sumsqu( as_i16s(s1) < 0 ? -swap(s1) : swap(s1), as_i16s(s2) < 0 ? -swap(s2) : swap(s2), dl, dh ); } FN(sumsqfxu) { fnx_sumsqu( swap(s1), swap(s2), dl, dh ); } LOCAL INLINE uint32_t isqrt_helper(uint32_t x) { uint32_t guess = 0, bit; for (bit = 0x8000u; bit != 0; bit >>= 1) { guess |= bit; if ( guess * guess > x ) guess ^= bit; } return guess; } FN(isqrt) { UNUSED(dh); UNUSED(s2); *dl = (uint16_t)isqrt_helper(s1); } FN(isqrtfx) { UNUSED(dh); UNUSED(s1); *dl = swap((uint16_t)isqrt_helper(256*swap(s2))); } FN(atan2) { int16_t x = as_i16(s1), y = as_i16(s2); uint16_t ux, uy; uint16_t a = 0; // Flip around X axis if (y < 0) { y = -y; a ^= 0x1F; } uy = y; // Flip around Y axis if (x < 0) { x = -x; a ^= 0xF; } ux = x; // Flip around 45-degree line if (uy > ux) { uint16_t ut = ux; ux = uy; uy = ut; a ^= 0x7; } // Maximize precision for fxpt arith: slide leading 1 to top if (ux) while ((ux & 0x8000) == 0) { ux <<= 1; uy <<= 1; } // Are we above or below 22.5 degree line? Slope approx 0.41421 uint16_t nx = (ux * 0x6A0A) >> 16; if (uy > nx) { // Above: toggle the bit a ^= 0x2; // Are we above or below the 33.75 degree line? Slope approx 0.66818 nx = (ux * 0xAB0Eu) >> 16; if (uy > nx) a ^= 0x1; } else { // Are we above or below the 11.25 degree line? Slope approx 0.19891 nx = (ux * 0x32EC) >> 16; if (uy > nx) a ^= 0x1; } // We now have an angle 0..31, with center points 11.25 degrees apart, // with the first center point at 6.125 degrees. Collapse this to // 0..15, with the first center point at 0 deg. a = ((a + 1) >> 1) & 0xF; *dl = a; UNUSED(dh); } FN(atan2fx) { fnx_atan2(swap(s1),swap(s2),dl,dh); } FN(i2bcd) { uint32_t value = (as_u32(s1) << 16) | s2; if ( value >= 99999999 ) { *dh = 0x9999; *dl = 0x9999; return; } const uint32_t digit7 = ((value / 10000000) % 10) << 12; const uint32_t digit6 = ((value / 1000000 ) % 10) << 8; const uint32_t digit5 = ((value / 100000 ) % 10) << 4; const uint32_t digit4 = ((value / 10000 ) % 10) << 0; const uint32_t digit3 = ((value / 1000 ) % 10) << 12; const uint32_t digit2 = ((value / 100 ) % 10) << 8; const uint32_t digit1 = ((value / 10 ) % 10) << 4; const uint32_t digit0 = ((value / 1 ) % 10) << 0; *dh = digit7 | digit6 | digit5 | digit4; *dl = digit3 | digit2 | digit1 | digit0; } FN(bcd2i) { const uint32_t digit7 = ((s1 >> 12) & 0xF) * 10000000; const uint32_t digit6 = ((s1 >> 8) & 0xF) * 1000000; const uint32_t digit5 = ((s1 >> 4) & 0xF) * 100000; const uint32_t digit4 = ((s1 >> 0) & 0xF) * 10000; const uint32_t digit3 = ((s2 >> 12) & 0xF) * 1000; const uint32_t digit2 = ((s2 >> 8) & 0xF) * 100; const uint32_t digit1 = ((s2 >> 4) & 0xF) * 10; const uint32_t digit0 = ((s2 >> 0) & 0xF) * 1; const uint32_t rslt = digit7 + digit6 + digit5 + digit4 + digit3 + digit2 + digit1 + digit0; *dh = rslt >> 16; *dl = rslt & 0xFFFF; } /* Divide, rounding toward -oo */ LOCAL int div_10( int x ) { int q = 0; assert(x <= 33); assert(x >= -19); while (x > 9) { q++; x -= 10; } while (x < 0) { q--; x += 10; } return q; } /* The Cadillac of BCD add/subtract implementations. */ /* Treats non-canonical digits A - F as values 10 - 15, consistently. */ LOCAL void abcd_helper ( const uint16_t s1, const uint16_t s2, // Inputs uint16_t *const dl, uint16_t *const dh, // Outputs int ci_flag, // Examine carry in int co_flag, // Write carry out int addsub_flag // 1 == ADD, 0 == SUB ) { // Carry/borrow in is signed. 3 LSBs are significant, // all others are ignored. // // 011 +3 111 -1 // 010 +2 110 -2 // 001 +1 101 -3 // 000 0 100 -4 // // Maximum carry out +3 in arises from F + F + 3 = 33 // Maximum borrow out -2 in arises from 0 - F - 4 = -19 const int s1_d0 = (s1 >> 0) & 0xF, s2_d0 = (s2 >> 0) & 0xF; const int s1_d1 = (s1 >> 4) & 0xF, s2_d1 = (s2 >> 4) & 0xF; const int s1_d2 = (s1 >> 8) & 0xF, s2_d2 = (s2 >> 8) & 0xF; const int s1_d3 = (s1 >> 12) & 0xF, s2_d3 = (s2 >> 12) & 0xF; const int ci_d0 = ci_flag ? ((*dl & 7) ^ 4) - 4 : 0; const int raw_d0 = (addsub_flag ? s1_d0 + s2_d0 : s1_d0 - s2_d0) + ci_d0; const int co_d0 = div_10( raw_d0 ); const int digit0 = raw_d0 - co_d0 * 10; const int ci_d1 = co_d0; const int raw_d1 = (addsub_flag ? s1_d1 + s2_d1 : s1_d1 - s2_d1) + ci_d1; const int co_d1 = div_10( raw_d1 ); const int digit1 = raw_d1 - co_d1 * 10; const int ci_d2 = co_d1; const int raw_d2 = (addsub_flag ? s1_d2 + s2_d2 : s1_d2 - s2_d2) + ci_d2; const int co_d2 = div_10( raw_d2 ); const int digit2 = raw_d2 - co_d2 * 10; const int ci_d3 = co_d2; const int raw_d3 = (addsub_flag ? s1_d3 + s2_d3 : s1_d3 - s2_d3) + ci_d3; const int co_d3 = div_10( raw_d3 ); const int digit3 = raw_d3 - co_d3 * 10; *dl = (digit3 << 12) | (digit2 << 8) | (digit1 << 4) | digit0; if (co_flag) *dh = co_d3; } FN(abcd ) { abcd_helper(s1, s2, dl, dh, 0, 0, 1); } FN(abcdl) { abcd_helper(s1, s2, dl, dh, 0, 1, 1); } FN(abcdm) { abcd_helper(s1, s2, dl, dh, 1, 1, 1); } FN(abcdh) { abcd_helper(s1, s2, dl, dh, 1, 0, 1); } FN(sbcd ) { abcd_helper(s1, s2, dl, dh, 0, 0, 0); } FN(sbcdl) { abcd_helper(s1, s2, dl, dh, 0, 1, 0); } FN(sbcdm) { abcd_helper(s1, s2, dl, dh, 1, 1, 0); } FN(sbcdh) { abcd_helper(s1, s2, dl, dh, 1, 0, 0); } FN(adds) { const int32_t rslt = as_i32(s1) + as_i32(s2); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(addu) { const int32_t rslt = as_u32(s1) + as_u32(s2); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(addm) { const int32_t rslt = as_u32(s1) + as_u32(s2) + as_u32(*dl); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(addh) { const int32_t rslt = as_u32(s1) + as_u32(s2) + as_u32(*dl); *dl = rslt & 0xFFFF; UNUSED(dh); } FN(subs) { const int32_t rslt = as_i32(s1) - as_i32(s2); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(subu) { const int32_t rslt = as_u32(s1) - as_u32(s2); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(subm) { const int32_t rslt = as_u32(s1) - as_u32(s2) + as_i32(*dl); *dl = rslt & 0xFFFF; *dh = rslt >> 16; } FN(subh) { const int32_t rslt = as_u32(s1) - as_u32(s2) + as_i32(*dl); *dl = rslt & 0xFFFF; UNUSED(dh); } FN(dmov) { *dh = s1; *dl = s2; } FN(addsub) { *dh = s1 + s2; *dl = s1 - s2; } LOCAL fnx_t *const fnx_tbl[][2] = { { fnx_add3, fnx_nadd }, { fnx_addfx, fnx_naddfx }, { fnx_sub3, fnx_sub3 }, { fnx_subfx, fnx_subfx }, { fnx_and3, fnx_nand }, { fnx_andn, fnx_orn }, { fnx_or3, fnx_nor }, { fnx_xor3, fnx_xnor }, { fnx_shl3, fnx_shl3 }, { fnx_shlu, fnx_shlu }, { fnx_shr3, fnx_shr3 }, { fnx_shru, fnx_shru }, { fnx_bshlu, fnx_bshlu }, { fnx_bshru, fnx_bshru }, { fnx_rol, fnx_rol }, { fnx_ror, fnx_ror }, { fnx_bitcntl, fnx_bitcntl }, { fnx_bitcntr, fnx_bitcntr }, { fnx_bitrevl, fnx_bitrevl }, { fnx_bitrevr, fnx_bitrevr }, { fnx_lmo, fnx_lmo }, { fnx_lmz, fnx_lmz }, { fnx_rmo, fnx_rmo }, { fnx_rmz, fnx_rmz }, { fnx_repack, fnx_repack }, { fnx_packl, fnx_packl }, { fnx_packh, fnx_packh }, { fnx_packlh, fnx_packlh }, { fnx_btog, fnx_btog }, { fnx_bset, fnx_bset }, { fnx_bclr, fnx_bclr }, { fnx_cmpeq, fnx_cmpne }, { fnx_cmpltu, fnx_cmpltu }, { fnx_cmpltfxu, fnx_cmpltfxu }, { fnx_cmpleu, fnx_cmpleu }, { fnx_cmplefxu, fnx_cmplefxu }, { fnx_cmpltua, fnx_cmpltua }, { fnx_cmpltfxua, fnx_cmpltfxua }, { fnx_cmpleua, fnx_cmpleua }, { fnx_cmplefxua, fnx_cmplefxua }, { fnx_cmplt, fnx_cmplt }, { fnx_cmpltfx, fnx_cmpltfx }, { fnx_cmple, fnx_cmple }, { fnx_cmplefx, fnx_cmplefx }, { fnx_cmplta, fnx_cmplta }, { fnx_cmpltfxa, fnx_cmpltfxa }, { fnx_cmplea, fnx_cmplea }, { fnx_cmplefxa, fnx_cmplefxa }, { fnx_min, fnx_minu }, { fnx_minfx, fnx_minfxu }, { fnx_max, fnx_maxu }, { fnx_maxfx, fnx_maxfxu }, { fnx_bound, fnx_boundu }, { fnx_boundfx, fnx_boundfxu }, { fnx_addcirc, fnx_addcirc }, { fnx_subcirc, fnx_subcirc }, { fnx_atan2, fnx_atan2 }, { fnx_atan2fx, fnx_atan2fx }, { fnx_subabs, fnx_subabsu }, { fnx_subabsfx, fnx_subabsfxu }, { fnx_dist, fnx_distu }, { fnx_distfx, fnx_distfxu }, { fnx_sumsq, fnx_sumsqu }, { fnx_sumsqfx, fnx_sumsqfxu }, { fnx_mpyss, fnx_mpyuu }, { fnx_mpyfxss, fnx_mpyfxuu }, { fnx_mpysu, fnx_mpysu }, { fnx_mpyfxsu, fnx_mpyfxsu }, { fnx_mpyus, fnx_mpyus }, { fnx_mpyfxus, fnx_mpyfxus }, { fnx_mpy16, fnx_mpy16 }, { fnx_isqrt, fnx_isqrtfx }, { fnx_aal, fnx_aal }, { fnx_aah, fnx_aah }, { fnx_divs, fnx_divs }, { fnx_divfxs, fnx_divfxs }, { fnx_divu, fnx_divu }, { fnx_divfxu, fnx_divfxu }, #define OPX_DIV32S 0x4E #define OPX_DIV32U 0x4F { NULL, NULL }, // DIV32S 0x4E { NULL, NULL }, // DIV32U 0x4F { fnx_adds, fnx_addu }, { fnx_addh, fnx_addm }, { fnx_subs, fnx_subs }, { fnx_subu, fnx_subu }, { fnx_subm, fnx_subm }, { fnx_subh, fnx_subh }, { fnx_dmov, fnx_dmov }, { fnx_addsub, fnx_addsub }, { fnx_abcd, fnx_abcdl }, { fnx_abcdh, fnx_abcdm }, { fnx_sbcd, fnx_sbcd }, { fnx_sbcdl, fnx_sbcdl }, { fnx_sbcdm, fnx_sbcdm }, { fnx_sbcdh, fnx_sbcdh }, { fnx_i2bcd, fnx_i2bcd }, { fnx_bcd2i, fnx_bcd2i }, { fnx_cmpeqa, fnx_cmpnea }, }; #define OPX_MAX (sizeof(fnx_tbl) / sizeof(fnx_tbl[0])) LOCAL INLINE void fnx_div32s(const uint16_t s1lo, const uint16_t s1hi, const uint16_t s2, uint16_t *const dl, uint16_t *const dh) { const int32_t ss1hi = as_i32(s1hi); const int32_t i1 = SLS32(ss1hi, 16) + s1lo; const int32_t i2 = as_i32(s2); *dl = 0x7FFFu; *dh = 0x7FFFu; if (i2) { const int32_t q = i1 / i2; const int32_t r = i1 % i2; if (q >= -0x8000 && q <= 0x7FFF) { *dl = q; *dh = r; } } } LOCAL INLINE void fnx_div32u(const uint16_t s1lo, const uint16_t s1hi, const uint16_t s2, uint16_t *const dl, uint16_t *const dh) { const uint32_t u1 = (as_u32(s1hi) << 16) + s1lo; const uint32_t u2 = as_u32(s2); *dl = 0xFFFFu; *dh = 0xFFFFu; if (u2) { const uint32_t q = u1 / u2; const uint32_t r = u1 % u2; if (q <= 0xFFFFu) { *dl = q; *dh = r; } } } #define S2_BAD (0) #define S2_REG (1) #define S2_PCST (2) #define S2_NCST (3) #define S1_REG (0) #define S1_XREG (1) /* ======================================================================== */ /* FN_EXT_ISA Execute the extended ISA. */ /* ======================================================================== */ int fn_ext_isa(const instr_t *instr, cp1600_t *cp1600) { const uint16_t opcode = instr->opcode.decoded.imm0 >> 8; const uint8_t amode = instr->opcode.decoded.amode; const uint8_t s_bit = instr->opcode.decoded.amode & 1; const uint8_t s1type = (instr->opcode.decoded.amode >> 3) & 1; const uint8_t s2type = (instr->opcode.decoded.amode >> 1) & 3; const uint8_t reg0 = instr->opcode.decoded.reg0; const uint8_t reg1 = instr->opcode.decoded.reg1; const uint16_t imm1 = instr->opcode.decoded.imm1; const uint8_t xdst = instr->opcode.decoded.xreg0; uint16_t src1, src1h, src2, *dst_hi, *dst_lo; cp1600->r[7]++; cp1600->intr = 0; /* -------------------------------------------------------------------- */ /* Validity checks. If these fail, the instruction behaves as a NOP. */ /* -------------------------------------------------------------------- */ if (amode >= 0x20 || amode < 0x02 || opcode >= OPX_MAX || s2type == S2_BAD) goto leave; /* -------------------------------------------------------------------- */ /* Bind pointers for dst_hi/dst_lo */ /* -------------------------------------------------------------------- */ dst_lo = &( cp1600->xr[xdst] ); dst_hi = &( cp1600->xr[15 & (1 + xdst)] ); /* -------------------------------------------------------------------- */ /* Update the PV register. */ /* -------------------------------------------------------------------- */ CP1600_WR(cp1600, 0x9F8D, *dst_lo); /* -------------------------------------------------------------------- */ /* Get initial values for src1, src2. */ /* -------------------------------------------------------------------- */ src1 = s1type == S1_REG ? cp1600->r[reg0] : cp1600->xr[reg0]; src2 = s2type == S2_REG ? cp1600->xr[reg1] : imm1; /* -------------------------------------------------------------------- */ /* DIV32S / DIV32U get special handling. */ /* -------------------------------------------------------------------- */ if (opcode == OPX_DIV32S || opcode == OPX_DIV32U) goto div32; /* -------------------------------------------------------------------- */ /* Swap src1/src2 if S bit is set in opcode. */ /* -------------------------------------------------------------------- */ if (s_bit) { int temp = src1; src1 = src2; src2 = temp; } /* -------------------------------------------------------------------- */ /* Dispatch to the particular instruction. */ /* -------------------------------------------------------------------- */ fnx_tbl[opcode][s_bit](src1, src2, dst_lo, dst_hi); leave: cp1600->r[7]++; return 9; /* Does this take 9 or 10 cycles? */ /* -------------------------------------------------------------------- */ /* Handle DIV32S/DIV32U specially. They have a 32-bit src1 argument */ /* and so for now that argument must come from a register. If that */ /* argument is an R-register, then deposit it in the upper 16 bits. */ /* If that argument is an X-register, fetch a 32-bit register pair. */ /* -------------------------------------------------------------------- */ div32: if (s_bit) goto leave; if (s1type == S1_REG) { src1h = src1; src1 = 0; } else { src1h = cp1600->xr[15 & (1 + reg0)]; } if (opcode == OPX_DIV32S) fnx_div32s(src1, src1h, src2, dst_lo, dst_hi); else fnx_div32u(src1, src1h, src2, dst_lo, dst_hi); cp1600->r[7]++; return 9; }