uint_type a = original_1; uint_type b = original_2; b = -b; uint_type result = a - b;
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; uint16 mask = 1; while(mask != 0) { uint16 nextmask = mask << 1; if(a & mask) // If A has that bit set? { // If result also has that bit set, set carry (next bit in result) if(result & mask) result ^= nextmask; // This could also be |= instead of ^= . // Flip this bit in result result ^= mask; } if(b & mask) // If B has that bit set? { // If result also has that bit set, set carry (next bit in result) if(result & mask) result ^= nextmask; // This could also be |= instead of ^= . // Flip this bit in result result ^= mask; } mask = nextmask; }The following version does potentially fewer iterations by applying carry calculations to all bits simultaneously. Requirements:
uint_type a = original_1; uint_type b = original_2; uint_type carry = a & b; uint_type result = a ^ b; while(carry != 0) { // If you need the mathematical carry from addition, // check the overflow from this shift. uint_type shiftedcarry = carry << 1; carry = result & shiftedcarry; result = result ^ shiftedcarry; }
uint_type a = original_1; uint_type b = original_2; uint_type result = a + b; bool carry = result < a; // result < b works too.
uint16 a = original_1; uint16 b = original_2; uint32 temp = a + b; // Alternative 1: uint16 tempc[] = temp; // Treat temp as two 16-bit components uint16 result = tempc[0]; // Take low word bool carry = tempc[1]; // Take high word // Alternative 2: Bit-wise operations bool carry = temp >> 16; // Bit-wise logical shift right uint16 result = temp & 0xFFFF; // Bit-wise AND operation
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; uint16 mask = 1; bool carry = false; while(mask != 0) { bool bit0 = a & mask; bool bit1 = b & mask; if(carry) { if(bit0 == bit1) result |= mask; carry = bit0 | bit1; } else { if(bit0 != bit1) result |= mask; carry = bit0 & bit1; } mask = mask << 1; }
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; bool math_carry = false; for(int bit=0; bit<16; ++bit) { bool bit0 = a&1; a >>= 1; // Shift-to-right, get carry bool bit1 = b&1; b >>= 1; // Shift-to-right, get carry if(math_carry) { bool shift_carry = bit0 == bit1; // Shift-to-right with carry: sets msb=carry result >>= 1; if(shift_carry) result |= 0x80; if(bit0) math_carry = true; else if(bit1) math_carry = true; else math_carry = false; // ^ Same as: math_carry = (bit0 || bit1); } else { bool shift_carry = bit0 != bit1; // Shift-to-right with carry: sets msb=carry result >>= 1; if(shift_carry) result |= 0x80; math_carry = false; if(bit0) if(bit1) math_carry = true; // ^ Same as: math_carry = (bit0 && bit1); } }Alternatively, as a state machine:
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; bool math_carry = false; for(int bit=0; bit<16; ++bit) switch(math_carry) { case false: { bool bit0 = a&1; a>>=1; // Rotate-right A, get carry if(bit0) goto sum1_x; } sum0_x: { bool bit1 = b&1; b>>=1; // Rotate-right B, get carry if(bit1) goto sum1; } sum0: result >>= 1; // Clear math_carry, rotate-right result math_carry=false; continue; sum1_x: { bool bit1 = b&1; b>>=1; // Rotate-right B, get carry if(bit1) goto sum2; } sum1: result >>= 1; result |= 0x8000; // Set carry, rotate-right result math_carry=false; continue; sum2: result >>= 1; // Clear math_carry, rotate-right result math_carry=true; continue; case true: { bool bit0 = a&1; a>>=1; // Rotate-right A, get carry if(bit0) goto sum2_x; } goto sum1_x; sum2_x: { bool bit1 = b&1; b>>=1; // Rotate-right B, get carry if(bit1) goto sum3; } goto sum2; sum3: result >>= 1; result |= 0x8000; // Set carry, rotate-right result math_carry=true; continue; }Which would be equivalent to this 6502 code (8-bit operands), assuming the 6502 didn't already have an addition operation:
; Inputs: a_operand = operand 1 ; b_operand = operand 2 ; C = carry ; Outputs: ; result = operand 1 + operand 2 + carry ; C = carry from addition ; A = 8 ; a_operand = 0 ; b_operand = 0 ; bits_remaining = 0 lda #8 sta bits_remaining bcs next_bit_carryset bcc next_bit_carryclear sum0_x: lsr b_operand ; Sum=0. Get next LSB from B_operand ; Arrived here when math_carry=0, A=0, B=x. (sum = 0 or 1) ; Put B-bit (whatever it is) in result, and keep math_carry=false. ror result ; Rotate-right result ;jmp next_bit_carryclear ; Unconditional jump. next_bit_carryclear: dec bits_remaining ; while(--bits_remaining > 0) beq done_add_clc ; { ;bne carry_false ;carry_false: lsr a_operand ; Sum=0. Get next LSB from A_operand bcc sum0_x ; Goto sum1_x if set, sum0_x if clear sum1_x: lsr b_operand ; Sum=1. Get next LSB from B_operand bcs sum2a ; Goto sum2a if set, sum1 if clear ;bcc sum1 ;sum1: ; Arrived here when math_carry=0, A=1, B=0 (sum=1) ; Arrived here when math_carry=1, A=0, B=0 (sum=1) sec ; Set carry ror result ; Rotate-right result bne next_bit_carryclear ; Unconditional jump. sum2a: ; Arrived here when math_carry=0, A=1, B=1 (sum=2) ; Arrived here when math_carry=1, A=0, B=1 (sum=2) clc ; Clear carry ror result ; Rotate-right result bpl next_bit_carryset ; Unconditional jump. carry_true: lsr a_operand ; Sum=1. Get next LSB from A_operand bcc sum1_x ; Goto sum2_x if set, sum1_x if clear ;bcs sum2_x ;sum2_x: lsr b_operand ; Sum=2. Get next LSB from B_operand ; Arrived here when math_carry=1, A=1, B=x. (sum= 2 or 3) ; Put B-bit (whatever it is) in result, and keep math_carry=true. ror result ; Rotate-right result ;jmp next_bit_carryset ; Unconditional jump. next_bit_carryset: dec bits_remaining ; if(!--bits_remaining) bne carry_true ;beq done_add_sec ; ;done_add_sec: sec bcs done_add done_add_clc: clc ;bcc done_add done_add: ; }
uint_type a[] = original_1[]; uint_type b[] = original_2[]; uint_type result[]; bool carry = false; // Clear carry for(n = 0; n < 4; ++n) // example if you're doing 32-bit addition using 8-bit integers { result[n] = a[n] + b[n] + carry; carry = /* save carry from this calculation*/; }
uint_type a = original_1; uint_type b = original_2; b = -b; uint_type result = a + b;
uint_type a = original_1; uint_type b = original_2; uint_type result = a - b; bool carry = a < result;
int_type a = original; bool negative = a < 0;or by testing the sign bit:
uint16 a = original; bool negative = a & 0x8000; // The highest-order bit of 16-bit integerTesting the sign bit can also be done on some platforms with a left-shift operation, if the operation produces a carry.
uint16 a = original; a = a << 1; // Binary left-shift by 1 bool carry = /* save carry from this calculation*/; bool negative = carry;And some platforms, such as the 6502, produce a is-negative flag simply by loading the value.
lda value ; Load the highest-order byte of the value. ; The N flag now contains the sign bit. bmi got_negative ;branch if N set bpl got_positive ;branch if N clearSimilarly on i386:
mov eax, value ; Load the highest-order word of the value. or eax, eax ; Test its bits ; test eax, eax ; Alternative way of testing the bits js got_negative ; branch if SF set jns got_positive ; branch if SF clear
uint_type a = original; uint_type result = ~a; // NOT operation result = result + 1;Alternatively, you can implement it by subtracting from zero: Requirements:
uint_type a = original; uint_type result = 0 - a;
uint16 a = original; uint16 result = a ^ 0xFFFF; // 0xFFFF has 16 bits
uint16 a = original; uint16 result = 0xFFFF - a; // 0xFFFF has 16 bits
uint_type a = original; uint_type result = -a; result = result - 1;
uint_type a = original_1; uint_type b = original_2; a = ~a; // Logical NOT b = ~b; // Logical NOT uint_type result = a | b; // Logical OR result = ~result; // Invert result
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; for(n = 0; n < 16; ++n) { // Test whether the bit is set in both operands if(a & (1 << n)) // Test whether bit n is set in a if(b & (1 << n)) // Test whether bit n is set in b result |= (1 << n); // Set bit N in target value }Alternative:
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; for(uint16 mask = 1; mask != 0; mask <<= 1) { // Test whether the bit is set in both operands if(a & mask) // Test A and mask have common bits if(b & mask) // Test B and mask have common bits result = result + mask; }
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; uint16 mask = 1; for(int bit=0; bit<16; ++bit) { if((a % 2) != 0 && (b % 2) != 0) result = result + mask; a = a / 2; // Integer division, round towards zero b = b / 2; // Integer division, round towards zero mask = mask + mask; // Multiply mask by 2 }
uint_type a = original_1; uint_type b = original_2; a = ~a; // Logical NOT b = ~b; // Logical NOT uint_type result = a & b; // Logical AND result = ~result; // Invert result
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; for(n = 0; n < 16; ++n) { // Test whether the bit is set in either operand if(a & (1 << n)) // Test whether bit n is set in a result |= (1 << n); // Set bit N in target value if(b & (1 << n)) // Test whether bit n is set in b result |= (1 << n); // Set bit N in target value }Alternative:
uint16 a = original_1; uint16 b = original_2; uint16 result = 0xFFFF; for(uint16 mask = 1; mask != 0; mask <<= 1) { // Deduct bit, if it's CLEAR in both operands if(!(a & mask)) // Test A and mask don't have common bits if(!(b & mask)) // Test B and mask don't have common bits result = result - mask; }
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; uint16 mask = 1; for(int bit=0; bit<16; ++bit) { if((a % 2) != 0 || (b % 2) != 0) result = result + mask; a = a / 2; // Integer division, round towards zero b = b / 2; // Integer division, round towards zero mask = mask + mask; // Multiply mask by 2 }
uint_type a = original_1; uint_type b = original_2; uint_type nota = ~a; // Logical NOT uint_type notb = ~b; // Logical NOT uint_type result = (a & notb) | (b & nota);a ^ b = (a | b) &~ (a & b)
uint_type a = original_1; uint_type b = original_2; uint_type result = (a | b) & ~(a & b);
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; for(n = 0; n < 16; ++n) { // Test whether the bit is set in a but not b if(a & (1 << n)) // Test whether bit n is set in a if(!(b & (1 << n))) // Test whether bit n is not set in b result |= (1 << n); // Set bit N in target value // Test whether the bit is set in b but not a if(b & (1 << n)) // Test whether bit n is set in b if(!(a & (1 << n))) // Test whether bit n is not set in a result |= (1 << n); // Set bit N in target value }Alternative:
uint16 a = original_1; uint16 b = original_2; uint16 result = 0xFFFF; for(uint16 mask = 1; mask != 0; mask <<= 1) { // Deduct bit, if it's CLEAR in both operands if(!(a & mask)) // Test A and mask don't have common bits if(!(b & mask)) // Test B and mask don't have common bits result = result - mask; // Deduct bit, if it's SET in both operands if(a & mask) // Test A and mask have common bits if(b & mask) // Test B and mask have common bits result = result - mask; }
uint16 a = original_1; uint16 b = original_2; uint16 result = 0; uint16 mask = 1; for(int bit=0; bit<16; ++bit) { if(((a % 2) + (b % 2)) == 1) result = result + mask; a = a / 2; // Integer division, round towards zero b = b / 2; // Integer division, round towards zero mask = mask + mask; // Multiply mask by 2 }
uint_type a = original; uint_type result = a + a; // This result may also produce a carry. It's often needed.
uint8 a[] = original[]; bool carry = false; // Clear carry for(n = 0; n < 4; ++n) // example if you're doing 32-bit shift using 8-bit integers { bool new_carry = (a[n] & 0x80) ? 1 : 0; // Check the highest-order bit result[n] = (a[n] << 1) | carry; carry = new_carry; /* save carry from this calculation*/ }
uint_type a = original; uint_type result = a / 2;Of course, often that operation is not available, so...
uint16 a = original; uint_type amount = shift_amount; amount = 16-amount; // Invert the shift amount uint32 temp = original << amount; // Shift LEFT uint16 tempc[] = temp; // Treat temp as two 16-bit components uint16 result = tempc[1]; // Take high word
uint8 a[] = original[]; bool carry = false; // Clear carry for(n = 0; n < 4; ++n) // example if you're doing 32-bit shift using 8-bit integers { bool new_carry = a[n] & 1; // Check the lowest-order bit result[n] = (a[n] >> 1) | (carry ? 0x80 : 0x00); carry = new_carry; /* save carry from this calculation*/ }
uint16 a = original; uint16 result = a >> 1; result = result & 0x7FFF; // Keep all bits except the topmost one.Generalized to shifting by any amount: Requirements:
uint16 a = original; uint_type amount = shift_amount; uint16 result = a >> amount; uint16 mask = (1 << (16-amount)) - 1; // Change 16 to the bit-width of result result = result & mask;
uint_type a = original; uint_type result = 0; // Repeat these lines for powers of two, according // to the upper limit of known value of A. if(a >=64) { result = result + 32; a = a - 64; } if(a >=32) { result = result + 16; a = a - 32; } if(a >=16) { result = result + 8; a = a - 16; } if(a >= 8) { result = result + 4; a = a - 8; } if(a >= 4) { result = result + 2; a = a - 4; } if(a >= 2) { result = result + 1; a = a - 2; }
cmp #0 ; Compare value to zero. Sets carry if it's negative. ror a ; Shift to the right. ; Carry becomes the new highest bit, ; lowest bit becomes the new carry.
uint16 a = original; uint16 result = a >> 1; // Logical bit-shift to the right // Copy the sign bit from the original. result = result | (a & 0x8000); // ^ This can also be done with a bit-test & bit-set operation // if it is cheaper, or if OR and AND operations are not available.
uint16 a = original; uint_type amount = shift_amount; uint16 result = a >> amount; if(a & 0x8000) // If the number was signed { // This shift depends on leftshiting discarding // the excess bits: uint16 signbits = 0xFFFF << (16-amount); // Alternative way: //uint16 signbits = ~((1 << (16-amount)) - 1); // Patch in the sign bits: result = result | signbits; }
uint_type a = original_1; uint_type b = original_2; uint_type result = 0; while(a != 0) { if(a & 1) // If the lowest order bit is set in A? { result = result + b; } a = a >> 1; // Note: This must set highest order bit ZERO. It must not copy the sign bit. b = b + b; // Alternatively, left-shift by 1 bit }If you need to do multiplication with larger integer sizes, just implement this algorithm using additions and shifts for larger integer sizes.
uint_type a = original_1; uint_type b = original_2; uint_type stack[7]; stack[0] = b; b = b+b; // b*1 stack[1] = b; b = b+b; // b*2 stack[2] = b; b = b+b; // b*4 stack[3] = b; b = b+b; // b*8 stack[4] = b; b = b+b; // b*16 stack[5] = b; b = b+b; // b*32 stack[6] = b; // b*64 uint_type result = 0; // Repeat these lines for powers of two, according // to the upper limit of known value of A. if(a >=64) { result = result + stack[6]; a = a - 64; } else { /* ignore stack[6] */ } if(a >=32) { result = result + stack[5]; a = a - 32; } else { /* ignore stack[5] */ } if(a >=16) { result = result + stack[4]; a = a - 16; } else { /* ignore stack[4] */ } if(a >= 8) { result = result + stack[3]; a = a - 8; } else { /* ignore stack[3] */ } if(a >= 4) { result = result + stack[2]; a = a - 4; } else { /* ignore stack[2] */ } if(a >= 2) { result = result + stack[1]; a = a - 2; } else { /* ignore stack[1] */ } if(a >= 1) { result = result + stack[0]; } else { /* ignore stack[0] */ }This algorithm is faster than a raw addition-loop for all but the smallest multipliers. If no extra memory or a stack is available, then an addition loop is the only option. Requirements:
uint_type a = original_1; uint_type b = original_2; /* Optional: Ensure minimal number of loops by swapping operands if necessary */ if(a > b) { uint_type temp = a; a = b; b = temp; } uint_type result = 0; while(a > 0) { result = result + b; a = a - 1; }
15 wx EA=x*z GF=w*z *74 *yz JH=x*y LK=w*y ---- ---- = 20 = EA B = E+F+H + 4 + GF C = G+J + carry from B + 35 + JH D = L + carry from C + 7 +LK ---- ---- 1110 DCBAExample in x86_64 assembler, producing 128-bit * 128-bit = 256-bit multiplication with 64-bit registers:
mov rsi, 0 ; use a zero as a constant sometimes. ; DCBA now contains ????. ; store 00EA: mov rax, x ; low 64 bits of operand 1 mul z ; low 64 bits of operand 2 mov A, rax ; store low 64 bits of result (A) mov rbx, rdx ; store high 64 bits of result (E) mov rcx, rsi ; clear C and D mov rdi, rsi ; add 0GF0: mov rax, x ; low 64 bits of operand 1 mul y ; high 64 bits of operand 2 add rbx, rax ; store low 64 bits of result (F) adc rcx, rdx ; store high 64 bits of result (G) ;adc rdi, rsi ; not needed here, EA+GF0 never overflows to fourth digit ; add 0JH0: mov rax, w ; high 64 bits of operand 1 mul z ; low 64 bits of operand 2 add rbx, rax ; store low 64 bits of result (H) adc rcx, rdx ; store high 64 bits of result (J) adc rdi, rsi ; add LK00: mov rax, w ; high 64 bits of operand 1 mul y ; high 64 bits of operand 2 add rcx, rax ; store low 64 bits of result (K) adc rdi, rdx ; store high 64 bits of result (L) ; store to actual DCB mov B, rbx mov C, rcx mov D, rdiSee also:
int_type a = original_1; int_type b = original_2; bool sign = false; if(a < 0) { sign = !sign; // Toggle the signed flag a = -a; // Negate a } if(b < 0) { sign = !sign; // Toggle the signed flag b = -b; // Negate b } // a and b are both positive now int_type result = a * b; // Do unsigned multiplication if(sign) { // Negate the result result = -result; }
if(divisor == 0) { // TODO: Do whatever should happen when dividing by zero. // Don't run the rest of this algorithm if divisor = 0. return; } uint_type scaled_divisor = divisor; // The right-hand side of division uint_type remain = dividend; // The left-hand side of division, i.e. what is being divided uint_type result = 0; uint_type multiple = 1; while(scaled_divisor < dividend) // Alternative: while(!(scaled_divisor & 0x8000)) // For 16-bit, test highest order bit. // Alternative: while(not_signed(scaled_divisor)) // Same as above. { scaled_divisor = scaled_divisor + scaled_divisor; // Multiply by two. multiple = multiple + multiple; // Multiply by two. // You can also use binary shift-by-left here (i.e. multiple = multiple << 1). } do { if(remain >= scaled_divisor) { remain = remain - scaled_divisor; result = result + multiple; } scaled_divisor = scaled_divisor >> 1; // Divide by two. multiple = multiple >> 1; } while(multiple != 0); // Now: result = division result (quotient) // remain = division remainder (modulo)If you need to do division or modulo with larger integer sizes, just implement this algorithm using additions and shifts for larger integer sizes.
uint_type remain = dividend; // The left-hand side of division, i.e. what is being divided uint_type result = 0; while(remain >= divisor) { remain = remain - divisor; result = result + 1; }This is how you might do divisions, if you had no bit-shift operations available. You simply measure how many times the divisor can be subtracted from the dividend before it becomes negative. However, suppose your dividend is 1500 and your divisor is 2. This algorithm will loop for 750 iterations. How to improve it? Well, you could first divide by sixteen, i.e. two times eight.
uint_type remain = dividend; uint_type result = 0; while(remain >= divisor*8) { remain = remain - divisor*8; result = result + 8; } while(remain >= divisor*1) { remain = remain - divisor*1; result = result + 1; }Now this algorithm will loop for 99 iterations, and produces the same correct result. But we could do better! How about dividing first by 128? And maybe we could subdivide that 16 into 4 and 8. The first algorithm automates this subdivision process, doing the compare-and-subtract test for all power-of-two multiples of the divisor, converging towards the O(log2(n)) goal. It begins with the largest multiple of the divisor that is not larger than the dividend.
uint_type remain = dividend; // The left-hand side of division, i.e. what is being divided uint_type scaled_divisor = divisor; // The right-hand side of division uint_type result = 0; uint_type multiple = 1; // TODO: Check for zero divisor stack_push(0); while(scaled_divisor < dividend) { stack_push(multiple); stack_push(scaled_divisor); scaled_divisor = scaled_divisor + scaled_divisor; // Multiply by two. multiple = multiple + multiple; // Multiply by two. } while( (scaled_divisor = stack_pop()) != 0 ) { multiple = stack_pop(); if(remain >= scaled_divisor) { remain = remain - scaled_divisor; result = result + multiple; } } // Now: result = division result (quotient) // remain = division remainder (modulo)
int_type a = original_1; int_type b = original_2; bool sign = false; if(a < 0) { sign = !sign; // Toggle the signed flag a = -a; // Negate a } if(b < 0) { sign = !sign; // Toggle the signed flag b = -b; // Negate b } // a and b are both positive now int_type result = a / b; // Do unsigned division if(sign) { // Negate the result result = -result; }
int_type a = original_1; int_type b = original_2; bool sign = false; if(a < 0) { sign = !sign; // Toggle the signed flag a = -a; // Negate a } if(b < 0) { // Exception: Do NOT toggle the signed flag. b = -b; // Negate b } // a and b are both positive now int_type result = a % b; // Do unsigned modulo if(sign) { // Negate the result result = -result; }
uint_type a = original_exponent; int_type b = original_value; int_type result = 1; while(a > 1) { if(a & 1) // A is odd? { result = result * b; } b = b * b; a >>= 1; // right-shift, i.e. divide by 2, round down } if(a > 0) { result = result * b; }
uint_type a = original_exponent; int_type b = original_value; int_type stack[4]; stack[0] = b; b = b*b; // b^1 stack[1] = b; b = b*b; // b^2 stack[2] = b; b = b*b; // b^4 stack[3] = b; // b^8 int_type result = 1; // Repeat these lines for powers of two, according // to the upper limit of known value of A. if(a >= 8) { result = result * stack[3]; a = a - 8; } else { /* ignore stack[3] */ } if(a >= 4) { result = result * stack[2]; a = a - 4; } else { /* ignore stack[2] */ } if(a >= 2) { result = result * stack[1]; a = a - 2; } else { /* ignore stack[1] */ } if(a >= 1) { result = result * stack[0]; } else { /* ignore stack[0] */ }This algorithm is faster than a multiplication-loop for exponents larger than 4. It is not necessary to use the extra memory or stack or temporary variables, but then you would need to recalculate the same values many times. It is still more efficient than a multiplication loop if the multiplier is large enough and has few bits set.
uint_type a = original_exponent; int_type b = original_value; int_type result = 1; // Repeat these lines for powers of two, according // to the upper limit of known value of A. if(a >= 8) { int_type tmp=b; b=b*b; b=b*b; b=b*b; result = result * tmp; a = a - 8; } if(a >= 4) { int_type tmp=b; b=b*b; b=b*b; result = result * tmp; a = a - 4; } if(a >= 2) { int_type tmp=b; b=b*b; result = result * tmp; a = a - 2; } if(a >= 1) { result = result * b; }A multiplication loop is the simplest and slowest option. Requirements:
uint_type a = original_exponent; uint_type b = original_value; uint_type result = 1; while(a > 0) { result = result * b; a = a - 1; }
result = b // b b = b*b // b^2 result = result * b // b^3 (b * b^2) b = b*b // b^4 result = result * b // b^7 (b^3 * b^4) b = b*b // b^8 result = result * b // b^15 (b^7 * b^8)But n^15 can in fact be done with just five multiplications:
result1 = b // b result2 = result1*result1 // b^2 result3 = result1*result2 // b^3 result6 = result3*result3 // b^6 result9 = result6*result3 // b^9 result15 = result9*result6 // b^15This works by recursively subdividing the exponent into two parts, and is done automatically by e.g. GCC. The GCC source code gives this reference for the algorithm:
/* Expand power operator to optimal multiplications when a value is raised to an constant integer n. See section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", 3rd Edition, 1998. */It features a 256-element lookup table. For exponent n, this table gives value i = table[n]. It means the following: x^n can be calculated as x^i * x^(n-i). You can find the table in the GCC source code (search for “powi_table”), or in that book, or alternatively you can use this C++ program to generate the table in whichever size you desire: http://bisqwit.iki.fi/jkp/powi-table-generator.cc The first 16 values of that table are: 0, 1, 1, 2, 2, 3, 3, 4, 4, 6, 5, 6, 6, 10, 7, and 9. For exponent 15, this means x^9 * x^6. For exponent 9, this means x^6 * x^3. And so on.
result = !(a != b);Less-than operation:
result = !(a < b) && !(b < a);Subtraction and compare to zero:
result = a-b; // Zero if equalXOR and compare to zero:
result = a^b; // Zero if equal
result = !(a == b);Less-than operation:
result = a < b || b < a;Subtraction and compare to zero:
result = a-b; // Non-zero if equalXOR and compare to zero:
result = a^b; // Non-zero if equal
result = !(a <= b);Less-than operation:
result = b < a;
result = !(a < b);
result = !(a >= b);
result = !(a > b);Less-than operation:
result = !(b < a);
Operation | Reduction | Other equivalent functions |
---|---|---|
asinh(x) | log(x + sqrt(x² + 1)) | = −asinh(−x) = −1 i* asin(−1i*x) = 1 i*asin(1i*x) |
acosh(x) | log(x + sqrt(x² − 1)) | = 1 i*acos(x) when tan(arg(x)) > 0 = −1 i*acos(x) when tan(arg(x)) < 0 |
asin(x) | 1i *log(1i*(x + sqrt(x²−1) | = −asin(−x) = −1 i*asinh(−1i*x) = 1 i*asinh(1i*x) |
acos(x) | −1 i *log(x + sqrt(x²−1)) or −1i*log(x − sqrt(²−1)) | = 1 i*acosh(x) when tan(arg(x)) < 0 = −1 i*acosh(x) when tan(arg(x)) > 0 |
atanh(x) | log( (1+x) / (1−x)) / 2 | = i * atan(−1i * x) = sometimes also −1i*atan(1i*x) |
atan(x) | −0.5 i*log((1+1i*x) / (1−1i*x)) | = −1 i*atanh(1i*x) = sometimes also 1 i*atanh(−1i*x) |
sinh(x) | 0.5 * (exp(x) − exp(−x)) | = −sinh(−x) = 1i*sin(−1i*x) = −1i*sin(1i*x) |
cosh(x) | 0.5 *(exp(x) + exp(−x)) | = cosh(−x) = cos(1i*x) = cos(−1i*x) |
sin(x) | 0.5 i*(exp(x*−1i) − exp(x*1i)) | = −sin(−x) = 1i*sinh(−1i *x) = −1i*sinh(1i*x) |
cos(x) | 0.5 *(exp(x*−1i) − exp(x*1i)) | = cos(−x) = cosh(1i*x) = cosh(−1i*x) |
tanh(x) | sinh(x) / cosh(x) = −1 * (1−exp( 2*x)) / (1+exp( 2*x)) | = −tanh(−x) = tan(1i*x) = −tan(−1i*x) |
tan(x) | sin(x) / cos(x) = 1i * (1−exp(2i*x)) / (1+exp(2i*x)) | = −tan(−x) = tanh(1i*x) = −tanh(−1i*x) |
sqrt(x) | exp(0.5 * log(x)) | = pow(x,0.5) |
exp(x) | = pow(e, x) (where e is euler's constant) | |
pow(x,y) | exp(y * log(x)) | Note: If x is negative, log(x) will produce complex numbers. |
log10(x) | log(x) / log(10) | |
x/y | = x * pow(y,-1) = x * exp(-log(y)) |
Operation | Equivalent function using real functions only | What happens in special cases |
---|---|---|
sin(x) | sin(Xr)*cosh(Xi) + i*cos(Xr)*sinh(Xi) | sin(Xr) if Xi=0 (duh) i*sinh(Xi) if Xr=0 |
cos(x) | cos(Xr)*cosh(Xi) − i*sin(Xr)*sinh(Xi) | cos(Xr) if Xi=0 cos(Xi) if Xr=0 |
sinh(x) | sinh(Xr)*cos(Xi) + i*cosh(Xr)*sin(Xi) | sinh(Xr) if Xi=0 i*sin(Xi) if Xr=0 |
cosh(x) | cosh(Xr)*cos(Xi) + i*sinh(Xr)*sin(Xi) | cosh(Xr) if Xi=0 cos(Xi) if Xr=0 |
tan(x) | (sin(2*Xr) + i*sinh(2*Xi)) / (cos(2*Xr) + cosh(2*Xi)) | tan(Xr) if Xi=0 i*tanh(Xi) if Xr=0 |
tanh(x) | (sinh(2*Xr) + i*sin(2*Xi)) / (cos(2*Xr) + cosh(2*Xi)) | tanh(Xr) if Xi=0 i*tan(Xi) if Xr=0 |
exp(x) | exp(Xr)*cos(Xi) + i*exp(Xr)*sin(Xi) | exp(Xr) if Xi=0 i*sin(Xi) if Xr=0 |
log(x) | log(abs(X)) + i * arg(X) | log(Xr) if Xi=0 and Xr>=0 log(-Xr) − πi if Xi=0 and Xr<0 (doesn't reduce into real) -iπ/2 if Xr=0 and Xi<0 iπ/2 if Xr=0 and Xi>0. |
abs(x) | sqrt(Xr² + Xi²) | Xr if Xi=0 Xi if Xr=0 |
arg(x) | atan2(Xi,Xr) | 0 if Xi=0 and Xr>0 π if Xi=0 and Xr<0 -iπ/2 if Xr=0 and Xi<0 iπ/2 if Xr=0 and Xi>0. |
atan(x) | 0.25i*log((Xr²+Xi²+2*Xi+1) / (Xr²+Xi² −2*Xi+1)) + 0.5 *(atan2(Xr,Xi+1)+atan2(Xr,1−Xi)) | atan(Xr) if Xi=0 |
atanh(x) | 0.25 *log((Xr²+Xi²+2*Xr+1) / (Xr²+Xi² −2*Xr+1)) + 0.5i *(atan2(Xi,Xr+1)+atan2(Xi,1−Xr)) | atanh(Xr) if Xi=0 |
x+y | Xr + i*Xi + Yr + i*Yi | |
x−y | Xr + i*Xi − Yr − i*Yi | |
x*y | (Xr*Yr − Xi*Yi) + i*(Xi*Yr + Xr*Yi) | i*i = −1 |
x/y | (Xr*Yr + Xi*Yi) / (Xr² + Yi²) + i*((Xi*Yr − Xr*Yi) / (Yr² + Yi²)) | |
sqrt(x) | sqrt((abs(x) + Xr) / 2) + i*sgn(Xi)*sqrt((abs(x) − Xr) / 2) where sgn(n) = -1, 0 or 1 depending on n's sign. |
Operation | Alternatives |
---|---|
sin(x) | −sin(−x) cos(π/2 − x) tan(x) * cos(x) Read: http://en.wikipedia.org/wiki/Trigonometric_function, http://en.wikipedia.org/wiki/Taylor_series |
cos(x) | cos(−x) sin(π/2 − x) sin(x) / tan(x) |
tan(x) | −tan(−x) sign(x) * sqrt(1 / cos(x)²-1) sin(x) / cos(x) |
asin(x) | −asin(−x) π/2 − acos(x) atan(x / sqrt(1 − x²)) |
acos(x) | π − acos(−x) π/2 − asin(x) π/2 − atan(x / sqrt(1 − x²)) asin(sqrt(1−x²)) if 0<=x<=1 |
atan(x) | −atan(−x) asin(x² / sqrt(x²+1)) 2 * atan((sqrt(1+x²) − 1) / x) 2 * atan(x / (1 + sqrt(1 + x²))) atan(y) + atan((x−y)/(1+x*y)) when sign(y)=sign(x) Read: http://en.wikipedia.org/wiki/Inverse_trigonometric_functions |
exp(x) | pow(e, x) cosh(x) + sinh(x) cosh(−x) − sinh(−x) pow(1 + x/y, y) when y approaches infinity exp(x−y) * exp(y) for any y Read: http://en.wikipedia.org/wiki/Exponential_function |
log(x) | log10(x) / log10(e) log2(x) / log2(e) asinh((x²+1) / (2*x)) acosh((x²−1) / (2*x)) 2*atanh((x−1) / (x+1)) log(x/y) + log(y) for any y>0 Read: http://en.wikipedia.org/wiki/Natural_logarithm |
log2(x) | log10(x) / log10(2) log(x) / log(2) |
log10(x) | log2(x) / log2(10) log(x) / log(10) |
pow(x,y) | 1 / pow(x,−y) exp(y*log(x)) if y is integer, a special algorithm may be applicable always pay attention to special cases |
sqrt(x) | pow(x,0.5) exp(0.5 * log(x)) Read: http://en.wikipedia.org/wiki/Methods_of_computing_square_roots, http://en.wikipedia.org/wiki/Newton%27s_method |
sinh(x) | −sinh(−x) exp(x) − cosh(x) cosh(x) − exp(−x) 2*sinh(x/2)*cosh(x/2) 1/2 * (exp(x) − exp(−x)) (exp(2*x)−1) / (2*exp(x)) tanh(x) * cosh(x) |
cosh(x) | cosh(−x) exp(x) − sinh(x) exp(−x) + sinh(x) 1/2 * (exp(x) + exp(−x)) (exp(2*x)+1) / (2*exp(x)) sinh(x) / tanh(x) |
tanh(x) | −tanh(−x) (exp(2*x)−1) / (exp(2*x)+1) sinh(x) / cosh(x) |
asinh(x) | log(x + sqrt(x² + 1)) 1/2 * acosh(2*x² + 1) |
acosh(x) | log(x + sqrt(x² − 1)) 1/2 * acosh(2*x² − 1) |
atanh(x) | 1/2 * log(x+1) − 1/2 * log(1−x) log( (1+x) / (1−x)) / 2 |
int_type a_exponent = original_1_exponent; int_type a_mantissa = original_1_mantissa; bool a_sign = original_1_sign; int_type b_exponent = original_2_exponent; int_type b_mantissa = original_2_mantissa; bool b_sign = original_2_sign; int_type result_exponent = original_1_exponent + original_2_exponent; int_type result_mantissa = original_1_mantissa * original_2_mantissa; bool result_sign = a_sign ^ b_sign; // XOR // After this, comes renormalization, which is the adjusting of // the exponent & mantissa until the mantissa is in proper range.
int_type a_exponent = original_1_exponent; int_type a_mantissa = original_1_mantissa; bool a_sign = original_1_sign; int_type b_exponent = original_2_exponent; int_type b_mantissa = original_2_mantissa; bool b_sign = original_2_sign; int_type result_exponent = original_1_exponent - original_2_exponent; int_type result_mantissa = original_1_mantissa / original_2_mantissa; bool result_sign = a_sign ^ b_sign; // XOR // After this, comes renormalization, which is the adjusting of // the exponent & mantissa until the mantissa is in proper range.