#include // for std::memset #include // for std::bitset #include // for std::cos, std::pow #include // Generic mechanism for caching function call results template auto cachedFunction(unsigned param, const T& f) -> const decltype(f(param))& { static std::bitset cached; static decltype(f(param)) results[extent] = {}; if(cached.test(param)) return results[param]; cached.set(param); return results[param] = f(param); } namespace IO { SDL_Surface *s; const int nsamples = 12, xres=960, yres=720; void Init() { SDL_Init(SDL_INIT_VIDEO); SDL_InitSubSystem(SDL_INIT_VIDEO); s = SDL_SetVideoMode(xres, yres, 32,0); signal(SIGINT, SIG_DFL); } struct RGBresult { float v[12]; int decay; }; // This function takes a NES color index and produces NTSC video signal // corresponding to that color. A few bits in the color index are also // used to convey information related to waveform smoothing. RGBresult UpdateSignal(unsigned pixel) { // Decode the color index. int color = (pixel & 0x0F), level = color<0xE ? (pixel>>4) & 3 : 1; float decay07 = ((pixel >> 9) & 3) / 3.f; float decay15 = ((pixel >>11) & 3) / 3.f; RGBresult res; // Voltage levels, relative to synch voltage static const float black=.518f, white=1.962f, attenuation=.746f, levels[8] = {.350f, .518f, .962f,1.550f, // Signal low 1.094f,1.506f,1.962f,1.962f}; // Signal high // Calculate the luma and chroma by emulating the relevant circuits: auto wave = [](int p, int color) { return (color+8+p)%12 < 6; }; for(int p=0; p<12; ++p) // 12 clock cycles per pixel. { // NES NTSC modulator (square wave between two voltage levels): float spot = levels[level + 4*(color <= 12*wave(p,color))]; // De-emphasis bits attenuate a part of the signal: if(((pixel & 0x40) && wave(p,12)) || ((pixel & 0x80) && wave(p, 4)) || ((pixel &0x100) && wave(p, 8))) spot *= attenuation; // Normalize: float v = (spot - black) / (white-black) - 0.5f; // Apply slight signal degradation. decay07 = decay07*0.3f + 0.7f*v; decay15 = decay15*-.5f + 1.5f*v; res.v[p] = (0.5f + decay07*0.7f + decay15*0.3f)/float(nsamples); } int d07 = int(decay07*3.f); if(d07<0) d07=0; if(d07>3) d07=3; int d15 = int(decay15*3.f); if(d15<0) d15=0; if(d15>3) d15=3; res.decay = (d07<<9) + (d15<<11); return res; } static float scanlines[240][256*12] = {{}}; static u16 prev1[3][240][256]={{{}}}; static unsigned prev2[3][240][xres]={{{}}}, tweaks[240]={0}; static unsigned Decay=0, Tweak=0; static bool diffs[240] = {false}; void PutPixel(unsigned px,unsigned py, unsigned pixel) { // The input value is a NES color index (with de-emphasis bits). // We need RGB values. To produce a RGB value, we emulate the NTSC circuitry. pixel += Decay; const RGBresult& res = cachedFunction< (1<<13) >(pixel, UpdateSignal); std::memcpy(&scanlines[py][px*12], &res.v[0], sizeof(res.v)); Decay = res.decay; u16 v=pixel^0x8000, &p = prev1[Tweak/4][py][px]; if(p != v) { p = v; diffs[py] = true; } } void FlushScanline(unsigned py, unsigned length) { static unsigned counter=0; if(py < 240) tweaks[py] = Tweak; if(py == 239 && ++counter%1 == 0) { // Simulate TV NTSC demodulator for this scanline #pragma omp parallel for schedule(guided) for(py=0; py<240; ++py) { unsigned y1 = (py )*yres/240; unsigned y2 = (py+1)*yres/240; auto& target = prev2[tweaks[py]/4][py]; auto& scanline = scanlines[py]; if(diffs[py]) { //printf("%u\t", py); fflush(stdout); auto cosf = [](int p) { return std::cos(3.141592653 * p / 6); }; for(unsigned px=0; px=0 && t<256*12) ? scanline[t] : 0.f; y += v; i += v * cachedFunction<12>((t+12)%12, cosf) * 1.2; q += v * cachedFunction<12>((t+21)%12, cosf) * 1.2; } auto gammafix = [=](float f) { return f <= 0.f ? 0.f : std::pow(f, 2.2f / gamma); }; auto clamp = [](int v) { return v<0 ? 0 : v>255 ? 255 : v; }; target[px] = 0x10000*clamp(255.95 * gammafix(y + 0.946882f*i + 0.623557f*q)) + 0x00100*clamp(255.95 * gammafix(y + -0.274788f*i + -0.635691f*q)) + 0x00001*clamp(255.95 * gammafix(y + -1.108545f*i + 1.709007f*q)); } diffs[py] = false; } for(unsigned y=y1; ypixels) + y*xres; std::memcpy(pix, target, sizeof(target)); } } SDL_Flip(s); } Decay = 0; Tweak = (Tweak + length*4) % 12; } }