#include #include #include /* Declare integer datatypes for each number of bits */ typedef uint_least8_t u8; typedef int_least8_t s8; typedef uint_least16_t u16; typedef int_least16_t s16; typedef uint_least32_t u32; typedef int_least32_t s32; typedef uint_least64_t u64; typedef int_least64_t s64; /* The following data structures provide templated access * of different data sizes within a 32-bit integer. */ template struct MemDataRef { union { u32 value; T alt[4 / sizeof(T)]; } d; // Get a reference to the "size" item beginning // at byte position "ind" within the 32-bit word. // All requests are assumed to be properly aligned. T& reffun(unsigned ind) { return d.alt[ind >> (sizeof(T)==2)]; } }; template<> struct MemDataRef<1,void>: public MemDataRef<0, u8> { }; template<> struct MemDataRef<2,void>: public MemDataRef<0, u16> { }; template<> struct MemDataRef<4,void>: public MemDataRef<0, u32> { }; /* Declare facilities for detecting and dealing with different byteorder */ #define LITTLE_ENDIAN_HOST 1 /* 1=little endian */ #define LITTLE_ENDIAN_SLAVE 0 /* 0=big endian */ #define CROSS_ENDIAN (LITTLE_ENDIAN_HOST != LITTLE_ENDIAN_SLAVE) static unsigned SwapBytes(unsigned value, unsigned size) { if(size >= 2) value = ((value & 0xFF00FF00u) >> 8) | ((value & 0x00FF00FFu) << 8); if(size >= 4) value = (value >> 16) | (value << 16); return value; } /* Console I/O (note: there should be terminal emulation here, but * such implementation is skipped here, for brevity) */ # include static struct { bool Hit() { return kbhit(); } unsigned Getc() { int r = getch(); return r ? r : ' '; } void Putc(unsigned c) { putchar(c); fflush(stdout); } } Console; /* Declare all the devices that we emulate. */ // RAM device is very simple. template class SDRAM { u32 buffer[size_bytes / 4]; public: u32 Read(u32 index) const { return buffer[index]; } void Write(u32 index, u32 value) { buffer[index] = value; } }; // Xilinx UART lite (serial port), lite version class XilinxUARTlite { bool ints_enabled, tx_emptied_int_pending, overrun; struct { u8 len, pos, fifo[8]; } in; // Input queue // List of supported status and control bits enum { RXVALID=1, RXFULL=2, TXEMPTY=4, IE=16, OVERRUN=32, RST_RX=2 }; public: XilinxUARTlite(): ints_enabled(0), overrun(0), in{0,0,{0}} { } u32 Read(u32 index) { u32 result = TXEMPTY; if(ints_enabled) result |= IE; if(in.len > 0) result |= RXVALID; if(in.len == 8) result |= RXFULL; if(overrun) {result |= OVERRUN; overrun=false;} if(index<2 && in.len) result = in.fifo[ (in.pos + 8 - in.len--) % 8 ]; return result; } void Write(u32 index, u32 value) { if(index<3) { Console.Putc(value); tx_emptied_int_pending = true; } else { ints_enabled = value&IE; if(value&RST_RX) in.pos=in.len=0; } } bool CheckIRQ() { // TX interrupt is an edge; RX interrupt is a level. if(ints_enabled && (in.len > 0 || tx_emptied_int_pending)) { tx_emptied_int_pending=false; return true; } return false; } void Input() { static unsigned counter = 0; if(!counter) counter = 2000; else { --counter; return; } if(Console.Hit()) { if(in.len >= 8) { overrun = true; return; } in.fifo[ in.pos++ % 8 ] = Console.Getc(); ++in.len; } } }; // Xilinx interrupt timer (multi-timer chip) template class XilinxTimer { // List of registers enum { TCSR=0, TLR=1, TCR=2 }; // List of control bits enum { UDT=2, ARHT=16, LOAD=32, ENIT=64, ENT=128, TINT=256, ENALL=1024 }; struct impl { u32 regs[4]; void Write(u32 index, u32 value) { if(index == TCSR) { // Setting the TINT bit actually clears it, // otherwise it is preserved. if(value & TINT) value &= ~TINT; else value |= (regs[index] & TINT); } regs[index] = value; } bool CheckIRQ() const { return (regs[TCSR] & (TINT|ENIT)) == (TINT|ENIT); } void Tick() { if(!(regs[TCSR] & ENT)) return; // Timer does nothing unless enabled if(regs[TCSR] & LOAD) { regs[TCR] = regs[TLR]; return; } u32 previous = regs[TCSR & UDT] ? regs[TCR]-- : regs[TCR]++; if( (previous ^ regs[TCR]) == ~u32(0)) // overflow in either direction { regs[TCSR] |= TINT; // Raise the interrupt flag if(regs[TCSR] & ARHT) regs[TCR] = regs[TLR]; // Restart timer else regs[TCSR] &= ~ENT; // Stop timer } } } timers[N]; public: u32 Read(u32 which, u32 index) { return timers[which].regs[index]; } void Write(u32 which, u32 index, u32 value) { timers[which].Write(index, value); // Setting the ENALL bit on any of the timers sets ENT on all of them. if(value & ENALL) for(auto& t: timers) t.regs[TCSR] |= ENT; } void Tick() { for(auto& t: timers) t.Tick(); } bool CheckIRQ() const { for(const auto& t: timers) if(t.CheckIRQ()) return true; return false; } }; // Xilinx OPB Interrupt Controller class XilinxINTC { // List of registers enum { ISR/* List of active IRQs */, IPR, IER/* List of IRQs that can result in an interrupt */, IAR, SIE, CIE, IVR, MER }; // List of bits enum { ME=1, HIE=2 }; u32 regs[8]; public: XilinxINTC(): regs() { } u32 Read(u32 index) const { if(index == IPR) return regs[ISR] & regs[IER]; if(index == IVR) // Ordinal number of highest triggered interrupt { for(unsigned i=0; i<32; ++i) if(Read(IPR) & (1<(param&0xFFFFFC00, [&](unsigned tlbno) -> unsigned { TLBX = tlbno; return 0; } ); return; } } if(index < 0x2000 || write) index = 15; // Fallback to a safe "undefined" slot // Access the processor version registers if(write) PVRs[index&15] = param; else param = PVRs[index&15]; #undef r } /* Virtual memory management unit (MMU) function */ template unsigned TLBfind(unsigned virt_addr, const T& func) const { // Cache the last used index for this type of data access static unsigned last_matched_index = 0; bool UserMode = UM; unsigned exception_no = 16+2+4; // Exception for "no tlb" for(unsigned vv=0; vv<64; ++vv) { unsigned v = (last_matched_index + vv) % 64; // Index into TLB array auto& tlb = UTLB[v]; // Skip invalid TLBs if(!tlb.b.V) continue; // Skip TLB where TID mismatches the PID if(tlb.w.TID != 0 && PID != tlb.w.TID) continue; unsigned page_bits = 10 + tlb.b.SIZE * 2; // Skip TLB where TAG mismatches the given address if( u32(virt_addr ^ (tlb.b.TAG<<10)) >> page_bits ) continue; unsigned zone_type = (ZPR >> (30 - tlb.b.ZSEL*2)) & 3; // Remove the bit indicating "no tlb found" exception exception_no &= ~2; if(zone_type == 0 && UserMode) continue; // zone-prohibited access // Remove the bit indicating "zone protection" exception exception_no &= ~4; if(zone_type < (UserMode?3:2)) // Check WR and EX, unless exempted by zone { if(rw==intent_store && !tlb.b.WR) continue; // write access prohibited if(rw==intent_execute && !tlb.b.EX) continue; // execute access prohibited } if(rw==intent_execute && tlb.b.G) continue; // I/O space is guarded from execution last_matched_index = v; return func(v); } return exception_no; } } regs; public: /* List of read/write functions for each 64k page of the memory space */ struct { u32(*read)(u32); void(*write)(u32,u32); } IOmap[0x10000]; /* All memory access is routed through this template function. */ /* It handles reads and writes of different sizes, * with or without virtual memory mapping, * with or without byte order swaps (reverse). */ template unsigned MemAccess(u32 virt_addr, u32& value, bool reverse=false) { u32 phys_addr = virt_addr; bool e_bit = false; if(regs.VM) // Virtual memory mode? Do TLB lookup. { unsigned exception = regs.TLBfind(virt_addr, [&](unsigned tlbno) -> unsigned { // Map the address using the matching TLB. unsigned page_mask = (1024 << (2 * regs.UTLB[tlbno].b.SIZE)) - 1; phys_addr = (regs.UTLB[tlbno].b.RPN << 10) + (virt_addr & page_mask); e_bit = regs.UTLB[tlbno].b.E; return 0; }); // Produce an exception if the mapping failed (LSB indicates code/data). if(exception) return exception | (rw==intent_execute); } // Verify alignment, and produce an exception if the address was not properly aligned if(virt_addr % size != 0) return 1 | (size==4 ? 4 : 0); // An "E" bit in a TLB reverses the endianess for that page (except in opcode lookups). if(rw != intent_execute) reverse ^= e_bit; // This macro byteswaps if needed due to host/guest endian differences or the reverse bit. auto S = [=](u32 v) -> u32 { return size>1 && (reverse != CROSS_ENDIAN) ? SwapBytes(v,size) : v; }; // Read full 32 bits from the memory/device, // unless we're going to replace it entirely. MemDataRef data; if(rw != intent_store || size != 4) data.d.value = IOmap[phys_addr/0x10000].read(phys_addr & ~3); // Create a reference to the relevant data within the 32 bit word. auto& r = data.reffun( (phys_addr & (4-size)) ^ (reverse ? (4-size) : 0) ); if(rw == intent_store) { // Write to the relevant data, and commit // the entire 32-bit word the memory/device. Byteswap if needed. r = S(value); IOmap[phys_addr/0x10000].write(phys_addr & ~3, data.d.value); } else value = S(r); // Read the relevant data. Byteswap if needed. return 0; // Return "no exception" code } // This struct records the desired action to happen after instruction execution. struct execmode { unsigned mode:2, type:30, target:32; // Modes: // 0 = Normally proceed to the next instruction (other fields are ignored) // 1 = Jump, right now. (type=type of jump, target=where to jump) // 2 = Jump, but first execute one more instruction (delay slot) // 3 = Generate an exception, right now // (type = type of exception, // target = memory address causing the exception) }; // Decode and execute a single instruction. execmode ExecuteOne(u32 i, u32 arg2) { // Decode the operation type and operands from the opcode unsigned i_op = (i>>26)&0x3F; // Operation unsigned i_rd = (i>>21)&0x1F; // Target register index u32 RA = regs.r[(i>>16)&0x1F]; // Source register value bool imm = i_op & 0x08; // Category of instruction if(!imm) arg2 = regs.r[(i>>11)&0x1F]; // Second operand // Exit macros. #define Return return execmode{0,0,0} // Exit and store v to RD. // Note that regs.r[0] is always preserved as zero, even if written to. #define ReturnRD(v) do { if(i_rd) regs.r[i_rd] = (v); Return; } while(0) // Exit and store v to RD, and set carry to c (evaluated before v, which may depend on carry). #define ReturnRDc(v,c) do { bool nc = (c); if(i_rd) regs.r[i_rd] = (v); regs.C = regs.CC = nc; Return; } while(0) // Exit and prepare to branch/jump to another memory location #define ReturnJmp(target,delayed,type) return execmode{(delayed)?2:1, type, target} // Exit and raise an exception, concerning the given address. #define ReturnEx(extype, addr) return execmode{3,extype,addr} // Exit and raise a MMU exception, setting the exception info appropriately. #define ReturnMMUex(extype, ear, write) \ do { regs.Rx = i_rd; regs.S = write; regs.DIZ = error & 4; \ ReturnEx(extype &~ 4, ear); } while(0) // Determine which kind of operation it was: if(i == 0x00000000) ReturnEx(2,0); // Specially trap this illegal opcode #define tr(op) (((op) & 0x07) | (((op) & 0x30) >> 1)) switch( tr(i_op) ) { // Add and subtract operations: (ADD|RSUB)(I)(K)(C); comparisons: CMP(U) case tr(0x06): ReturnRD( arg2+RA+regs.C ); // ADD(I)KC case tr(0x07): ReturnRD( arg2-RA-regs.C ); // RSUB(I)KC case tr(0x02): ReturnRDc( arg2+RA+regs.C, arg2+RA+regs.C < arg2 ); // ADD(I)C case tr(0x03): ReturnRDc( arg2-RA-regs.C, arg2-RA-regs.C > arg2 ); // RSUB(I)C case tr(0x00): ReturnRDc( arg2+RA, arg2+RA < arg2 ); // ADD(I) case tr(0x01): ReturnRDc( arg2-RA, arg2-RA > arg2 ); // RSUB(I) case tr(0x04): ReturnRD( arg2+RA ); // ADD(I)K case tr(0x05): switch(imm?0:(i&0x7FF)) { case 0: ReturnRD( arg2-RA ); // RSUB(I)K case 1: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((s32)arg2 < (s32)RA)<<31) ); // CMP case 3: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((u32)arg2 < (u32)RA)<<31) ); // CMPU } Return;/*Silently ignore bad instruction*/ // Bit manipulation operations: (OR|AND|XOR|ANDN)(I) case tr(0x20): ReturnRD( RA | arg2 ); // OR, ORI case tr(0x21): ReturnRD( RA & arg2 ); // AND, ANDI case tr(0x22): ReturnRD( RA ^ arg2 ); // XOR, XORI case tr(0x23): ReturnRD( RA &~ arg2 ); // ANDN,ANDNI // Barrel shift operations: BS(RL|RA|LL)(I) case tr(0x11): if(i&0x400) ReturnRD( RA << (arg2 & 31) ); // BSLL(I) if(i&0x200) ReturnRD( s32(RA) >> (arg2 & 31) ); // BSRA(I) else ReturnRD( u32(RA) >> (arg2 & 31) ); // BSRL(I) // Multiplications: MUL(I|H|HU|HSU) case tr(0x10): switch(imm ? 0 : (i&0x7FF)) { case 0: ReturnRD( (s32)RA * (s32)arg2 ); // MUL,MULI case 1: ReturnRD( ((s64)(s32)RA * (s64)(s32)arg2) >> 32 ); // MULH case 2: ReturnRD( ((s64)(s32)RA * (s64)(u32)arg2) >> 32 ); // MULHSU case 3: ReturnRD( ((u64)(u32)RA * (u64)(u32)arg2) >> 32 ); // MULHU } Return; // One-bit copy / shift-right operators: SEXT(8|16), SR(A|C|L); cache control: W(I|D)C case tr(0x24): switch(i & 0x7FF) { case 0x60: ReturnRD( (s32)( s8)RA ); // SEXT8 (sign-extend 8 bits to 32 bits) case 0x61: ReturnRD( (s32)(s16)RA ); // SEXT16 (sign-extend 16 bits to 32 bits) case 0x01: ReturnRDc( (s32)RA >> 1, RA&1); // SRA (sign-extend) case 0x21: ReturnRDc((regs.CC<<31) | (RA >> 1), RA&1); // SRC (carry-extend) case 0x41: ReturnRDc( RA >> 1, RA&1); // SRL (zero-extend) case 0x64: if(regs.UM) ReturnEx(7,0); Return; // WDC case 0x66: if(regs.UM) ReturnEx(7,0); Return; // WDC.clear case 0x74: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush case 0x76: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush.clear case 0x68: if(regs.UM) ReturnEx(7,0); Return; // WIC (privileged cache-control instruction) } Return; // Conditional branches with delay slots or not: (BEQ|BNE|BLT|BLE|BGT|BGE)(I)(D) case tr(0x27): switch(i_rd & 15) { case 0: if((s32)RA == 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //EQ case 1: if((s32)RA != 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //NE case 2: if((s32)RA < 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LT case 3: if((s32)RA <= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LE case 4: if((s32)RA > 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GT case 5: if((s32)RA >= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GE } Return; // Unconditional branches / system calls, with delay slots or not: BR(A)(L)(I)(D), BRK(I), MBAR case tr(0x26): switch( (i >> 18) & 7) // This is hairy, because there are so many variants in this. { #define R if(i_rd) regs.r[i_rd] = regs.PC - 4; /* Macro for saving return address */ case 0: ReturnJmp(arg2+regs.PC-4, false, 0); // D=0, A=0, L=0 (BR(I) or MBAR). case 4: ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=0 (BR(I)D) case 5: R ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=1 (BRL(I)D) case 2: ReturnJmp(arg2, false, 0); // D=0, A=1, L=0 (BRA(I)) case 6: ReturnJmp(arg2, true, 0); // D=1, A=1, L=0 (BRA(I)D) case 7: {bool e = imm && arg2 == v_usr_ex; // D=1, A=1, L=1 (BRAL(I)D) R ReturnJmp(arg2, true, e ? 3 : 0 );} // escalate if target 0x08) case 3: {bool e = imm && (arg2 == v_usr_ex || arg2 == v_brk); if(regs.UM && !e) ReturnEx(7,0); // D=0, A=1, L=1 (BRK(I) = special, not delayed) regs.BIP = true; // deny usermode access, except R ReturnJmp(arg2, false, e ? 3 : 0 );} // escalate if target 0x08 or 0x18 case 1: break; // D=0, A=0, L=1 (BRL(I) = invalid) #undef R } Return; // Subroutine/exception returns: RT(S|I|B|E)D; Special register manipulation: M(F|T)S, MSR(CLR|SET) case tr(0x25): if(imm) // Note: MicroBlaze's return opcodes always include a delay slot. { if((i_rd&7) && regs.UM) ReturnEx(7,0); // privileged return ReturnJmp(RA+arg2, true, i_rd&7); // RTSD=0/RTID=1/RTBD=2/RTED=4 } // MFS and MTS {u32& op2 = i_rd ? regs.r[i_rd] : arg2; if((i&0x4000) && regs.UM) ReturnEx(7,0); // Write is a privileged operation regs.SPR( i&0x3FFF, (i&0x4000), (i&0x4000) ? RA : op2 ); // Write or read // Update the carry-copy bit regs.CC = regs.C; regs.PVR = true; // Ensure this flag is never turned off ReturnRD(op2);} // Memory access operations: #define HandleLoadStoreOpcode(opcode, rw, size) \ case tr(opcode): \ { \ if(auto error = MemAccess(RA+arg2, regs.r[i_rd], !imm && (i&0x200))) \ ReturnMMUex(error, RA+arg2, rw==intent_store); \ if(rw == intent_load && !i_rd) regs.r[i_rd] = 0; /* Ensure RD stays zero */ \ if(!imm && (i&0x400)) regs.C = regs.CC = false; /* exclusive access, not implemented */ \ ReturnRD(regs.r[i_rd]); \ } HandleLoadStoreOpcode(0x30, intent_load,1) // LBU(I)(R) HandleLoadStoreOpcode(0x31, intent_load,2) // LHU(I)(R) HandleLoadStoreOpcode(0x32, intent_load,4) // LW(I)(R)(X) HandleLoadStoreOpcode(0x34, intent_store,1) // SB(I)(R) HandleLoadStoreOpcode(0x35, intent_store,2) // SH(I)(R) HandleLoadStoreOpcode(0x36, intent_store,4) // SW(I)(R)(X) } #undef tr // Anything else: Illegal instruction exception ReturnEx(2,0); } // Reset the CPU void DoReset(u32 vector = v_reset, u32 cmdline_pointer = 0) { // Clear all registers. (Note: Real MicroBlaze does not clear R1..R31.) regs = {}; // Set those registers that should not be zero: regs.SHR = 0xFFFFFFFFu; regs.PC = vector; regs.r[5] = cmdline_pointer; // The following CPU features are not supported by this emulator: // // C_USE_PCMP_INSTR not implemented (PCMPNE,PCMPEQ,PCMPBF,CLZ) because CLZ and PCMPBF require proportionally much code // C_USE_DIV not implemented (IDIV,IDIVU) because handling the corner cases requires much code // C_USE_FPU=1 not implemented (FADD,FRSUB,FMUL,FDIV,FCMP) because lots of code (comparisons, corner cases) // C_USE_FPU=2 not implemented (FLT,FINT,FSQRT) because C_USE_FPU=1 not implemented // C_USE_STACK_PROTECTION not implemented because not utilized by Linux // C_USE_MSR_INSTR not implemented because implementation takes unproportional amount of code and not really needed // C_FSL_LINKS not implemented because is very complex framework but yet completely ignored by Linux // // The following CPU features ARE supported by this emulator: // // C_USE_BARREL=1, because emulating these was cheap in my opinion, and they help the performance a great deal // C_USE_HW_MUL=2, because emulating these was cheap in my opinion, and they help the performance a great deal // C_ILL_OPCODE_EXCEPTIONS=1, trivial to implement // C_UNALIGNED_EXCEPTIONS=1, trivial to implement // C_OPCODE_0x0_ILLEGAL=1, trivial to implement // C_USE_MMU=3, because without a fully functional MMU, Linux would be quite crippled in my opinion // C_MMU_ZONES=16, because once you support one, supporting 16 is trivial. Linux requires at least 2. // C_MMU_PRIVILEGED_INSTR=0 fully protect privileged instructions // C_MMU_TLB_ACCESS=3 full TLB register access // // Now, set up bit fields indicating the list of processor features. // For brevity (to save time in the video), I use hexadecimal constants here. regs.PVR = true; regs.PVRs[0] = 0xD4801500; regs.PVRs[2] = 0x00005470; regs.PVRs[11] = 0xC0200000|regs.MSR; } bool InterruptsEnabled() const { return regs.IE && !regs.EIP && !regs.BIP; } // Process an interrupt request (note: Call this only when InterruptsEnabled()=true). void DoInterrupt(u32 vector = v_intr) { regs.PushUVM(); // Go to kernel mode, save current mode regs.IE = false; regs.r[14] = regs.PC; regs.PC = vector; } // Fetch and execute one instruction, and process branch/jump/exception void Singlestep() { execmode next; auto NextOpcode = [&] (u32& value) -> bool { unsigned error = MemAccess(regs.PC, value, !CROSS_ENDIAN); if(error) { next = {3,error&~4,regs.PC}; return false; } // Exception regs.PC += 4; return true; }; u32 op_begin = regs.PC; u32 opcode; if(!NextOpcode(opcode)) { // Loading the instruction failed; produce an exception. goto GotException; } // HACK: When Linux function "emit_log_char" is invoked, output the // character directly to the UART. This makes the boot-process more // verbose when the kernel UART driver has not been loaded yet. if(op_begin == 0xc0010e04) IOmap[0x8400].write(0,regs.r[5]<<24); {s32 immval = (s16)opcode; // If the opcode was IMM, read the next one immediately. if( (opcode >> 16) == 0xB000 ) { if(!NextOpcode(opcode)) goto GotException; immval = (immval << 16) | (opcode & 0xFFFF); // Extend the literal parameter } next = ExecuteOne(opcode, immval);} switch(next.mode) { case 0: default: break; case 2: // Delayed jump { u32 btr = next.target; // Save the branch target if(!NextOpcode(opcode)) { next.mode = 2; regs.BTR = btr; goto GotException; } // Execute the in-delay-slot instruction. // Note: MicroBlaze documentation explictly prohibits the delay // slot being taken by an IMM, BRK or branch instruction, // so we won't deal with those cases here. execmode next2 = ExecuteOne(opcode, (s16)opcode); if(next2.mode == 3) // Did an exception happen in a delay slot? { regs.BTR = btr; next.type = next2.type; // Copy exception type next.target = next2.target; // Copy exception address goto GotException; } } // passthru case 1: // Jump switch(next.type) // Check for a special type of a jump { // type 1 = Return From Interrupt (set IE; copy UMS,VMS to UM,VM) case 1: regs.IE = true; regs.PopUVM(); break; // type 2 = Return From Break (clear BIP; copy UMS,VMS to UM,VM) case 2: regs.BIP = false; regs.PopUVM(); break; // type 3 = Break or User Exception (copy UM,VM to UMS,VMS) case 3: regs.PushUVM(); break; // type 4 = Return From Exception (set EE, clear EIP, zero ESR; copy UMS,VMS to UM,VM) case 4: regs.EIP = false; regs.EE = true; regs.ESR = 0; regs.PopUVM(); break; } regs.PC = next.target; break; case 3: GotException:; // Exception if(regs.EE || next.type >= 16) { // If EE is unset, ignore exception unless caused by MMU // For MMU exceptions, the return address is the beginning // of the opcode that caused the fault. For others, it is // the next opcode. regs.EIP = true; // Exception in progress = true regs.EAR = next.target; // Address causing the exception regs.EE = false; // Exceptions enabled = false regs.EC = next.type; // Exception code regs.DS = next.mode == 2; // Was it a delay slot exception. regs.PushUVM(); // Go kernel mode, save previous mode regs.r[17] = (next.type < 16 ? regs.PC : op_begin); regs.PC = v_hw_ex; } } } }; // Emulate the following hardware (Linux kernel is configured for them using a DTS): MicroBlazeCPU cpu; // Configured at 100 MHz SDRAM<0x1800000> RAM; // IO: 10000000 , 24 MiB of RAM (MicroBlaze Linux requires >=16.) SDRAM<0x10000> LowRAM; // IO: 00000000 , 64 KiB of RAM (needed for exception tables) XilinxINTC intc; // IO: 81800000 XilinxTimer<2> timer; // IO: 83c00000 IRQ 3 XilinxUARTlite uart; // IO: 84000000 IRQ 8 // Helper function for swapping byte order on memory access. static u32 S(u32 v) { return CROSS_ENDIAN ? SwapBytes(v, 4) : v; } int main() { // Set up the memory mapping. // First, make all unassigned addresses produce a run-time error. for(unsigned a=0; a<0x10000; ++a) cpu.IOmap[a] = { [](u32)->u32{ abort(); }, [](u32,u32) { abort(); } }; cpu.IOmap[0x0000] = { [](u32 i) { return LowRAM.Read(i/4); } , [](u32 i,u32 v) { LowRAM.Write(i/4,v); } }; for(unsigned a=0x1000; a<0x1180; ++a) cpu.IOmap[a] = { [](u32 i) { return RAM.Read((i-0x10000000)/4); } , [](u32 i,u32 v) { RAM.Write((i-0x10000000)/4,v); } }; cpu.IOmap[0x8180] = { [](u32 i) { return S(intc.Read((i/4)%8)); } , [](u32 i,u32 v) { intc.Write((i/4)%8,S(v)); } }; cpu.IOmap[0x8400] = { [](u32 i) { return S(uart.Read((i/4)%4)); } , [](u32 i,u32 v) { uart.Write((i/4)%4,S(v)); } }; cpu.IOmap[0x83C0] = { [](u32 i) { return S(timer.Read((i/16)&1, (i/4)%4)); } , [](u32 i,u32 v) { timer.Write((i/16)&1, (i/4)%4, S(v)); } }; // Load Linux kernel from U-Boot image struct { u32 a[3], image_size, load_offs, entry_offs, b[10]; } Header; const char* kernel = "simple~1.ub"; // simpleImage.emu.ub FILE* fp = fopen(kernel, "rb"); fprintf(stderr, "Loading kernel, %s...\n", kernel); fread(&Header, 0x40, 1, fp); if(LITTLE_ENDIAN_HOST) Header.load_offs = SwapBytes(Header.load_offs, 4); if(LITTLE_ENDIAN_HOST) Header.entry_offs = SwapBytes(Header.entry_offs, 4); if(LITTLE_ENDIAN_HOST) Header.image_size = SwapBytes(Header.image_size, 4); // Reset the CPU (and bring memory mapper to user mode) cpu.DoReset(Header.entry_offs, Header.load_offs + Header.image_size); // Read the kernel image into the RAM for(auto offs = Header.load_offs; offs < Header.load_offs + Header.image_size; offs += 4) { u32 value; if(fread(&value, 1, 4, fp) <= 0) break; cpu.MemAccess (offs, value, CROSS_ENDIAN); } fclose(fp); fprintf(stderr, "Done loading kernel.\n"); // Run the machine for(;;) // Infinite loop { cpu.Singlestep(); uart.Input(); timer.Tick(); if(timer.CheckIRQ()) intc.TriggerIRQ(3); if(cpu.InterruptsEnabled()) { if(uart.CheckIRQ()) intc.TriggerIRQ(8); if(intc.CheckIRQ()) cpu.DoInterrupt(); } } }