diff -NaHudr dosbox-0.73/src/dos/dos.cpp dosbox-0.73-patched/src/dos/dos.cpp --- dosbox-0.73/src/dos/dos.cpp 2009-05-25 21:44:45.000000000 +0300 +++ dosbox-0.73-patched/src/dos/dos.cpp 2010-04-04 00:55:51.620788494 +0300 @@ -32,6 +32,12 @@ #include "support.h" #include "serialport.h" +#include /* FOR BISQWIT LINUX SHELL SUPPORT */ +#include /* FOR BISQWIT LINUX SHELL SUPPORT */ +#include /* FOR BISQWIT LINUX SHELL SUPPORT */ +#include /* FOR BISQWIT LINUX SHELL SUPPORT */ +#include /* FOR BISQWIT LINUX SHELL SUPPORT */ + DOS_Block dos; DOS_InfoBlock dos_infoblock; @@ -755,6 +761,77 @@ //TODO Think hard how shit this is gonna be //And will any game ever use this :) case 0x53: /* Translate BIOS parameter block to drive parameter block */ + /* BISQWIT LINUX SHELL SUPPORT */ + { + switch(reg_al) + { + case 0: /* open */ + { + int fd = -1, pid = -1; + struct winsize ws; + memset(&ws, 0, sizeof ws); + ws.ws_row = reg_cx; + ws.ws_col = reg_dx; + + pid = forkpty(&fd, NULL, NULL, &ws); + if(!pid) + { + putenv("TERM=linux"); + execl(getenv("SHELL"), getenv("SHELL"), NULL); + // not reached + } + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); + reg_bx = fd; + reg_cx = pid; + break; + } + case 1: /* close */ + { + kill(reg_cx, SIGKILL); + close(reg_bx); + waitpid(reg_cx, NULL, 0); + break; + } + case 2: /* send */ + { + int fd = reg_bx; Bit16u towrite = reg_cx; + unsigned char* bisqdata = new unsigned char[towrite]; + MEM_BlockRead(SegPhys(ds)+reg_dx, bisqdata, towrite); + reg_cx = write(fd, bisqdata, towrite); + fprintf(stderr, "Sent <%.*s>\n", (int)reg_cx, bisqdata); + delete[] bisqdata; + break; + } + case 3: /* read */ + { + int fd = reg_bx; Bit16u toread = reg_cx; + if(toread > 0) + { + unsigned char* bisqdata = new unsigned char[toread]; + int r = read(fd, bisqdata, toread); + if(r < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + r = 0; + if(r > 0) MEM_BlockWrite(SegPhys(ds)+reg_dx, bisqdata, r); + reg_cx = r; + reg_dx = r < 0 ? errno : 0; + if(r > 0) fprintf(stderr, "Read <%.*s>\n", r, bisqdata); + delete[] bisqdata; + } + break; + } + case 4: /* resize */ + { + struct winsize ws; + memset(&ws, 0, sizeof(ws)); + ws.ws_row = reg_cx; + ws.ws_col = reg_dx; + ioctl(reg_bx, TIOCSWINSZ, &ws); + break; + } + } + break; + } + /* END BISQWIT SSH SUPPORT */ E_Exit("Unhandled Dos 21 call %02X",reg_ah); break; case 0x54: /* Get verify flag */ diff -NaHudr dosbox-0.73/src/gui/sdlmain.cpp dosbox-0.73-patched/src/gui/sdlmain.cpp --- dosbox-0.73/src/gui/sdlmain.cpp 2009-05-25 21:44:46.000000000 +0300 +++ dosbox-0.73-patched/src/gui/sdlmain.cpp 2010-07-07 19:37:28.404438512 +0300 @@ -51,6 +51,9 @@ #include "cross.h" #include "control.h" +#include "../nesvideos-piece.hh" +#include "hardware.h" + //#define DISABLE_JOYSTICK #if C_OPENGL @@ -1647,6 +1650,8 @@ //extern void UI_Init(void); int main(int argc, char* argv[]) { + bool WillLaunchVideoLog = false; + try { CommandLine com_line(argc,argv); Config myconf(&com_line); @@ -1659,6 +1664,8 @@ if(control->cmdline->FindString("-editconf",editor,false)) launcheditor(); if(control->cmdline->FindString("-opencaptures",editor,true)) launchcaptures(editor); if(control->cmdline->FindExist("-eraseconf")) eraseconfigfile(); + + WillLaunchVideoLog = control->cmdline->FindExist("-videolog"); /* Can't disable the console with debugger enabled */ #if defined(WIN32) && !(C_DEBUG) @@ -1818,8 +1825,19 @@ MAPPER_Init(); if (control->cmdline->FindExist("-startmapper")) MAPPER_Run(false); /* Start up main machine */ + + if(WillLaunchVideoLog) + { + fprintf(stderr, "Videolog enabled with env VIDEOLOG=%s\n", getenv("VIDEOLOG")); + LoggingEnabled=2; + CaptureState |= CAPTURE_VIDEO | CAPTURE_WAVE; + NESVideoSetVideoCmd(getenv("VIDEOLOG")); + } + control->StartUp(); /* Shutdown everything */ + + } catch (char * error) { GFX_ShowMsg("Exit to error: %s",error); fflush(NULL); diff -NaHudr dosbox-0.73/src/hardware/hardware.cpp dosbox-0.73-patched/src/hardware/hardware.cpp --- dosbox-0.73/src/hardware/hardware.cpp 2009-05-25 21:44:46.000000000 +0300 +++ dosbox-0.73-patched/src/hardware/hardware.cpp 2010-08-22 12:01:15.916745179 +0300 @@ -31,6 +31,8 @@ #include "render.h" #include "cross.h" +#include "../nesvideos-piece.hh" + #if (C_SSHOT) #include #include "../libs/zmbv/zmbv.cpp" @@ -38,7 +40,7 @@ static std::string capturedir; extern const char* RunningProgram; -Bitu CaptureState; +Bitu CaptureState = 0; #define WAVE_BUF 16*1024 #define MIDI_BUF 4*1024 @@ -300,6 +302,202 @@ #endif void CAPTURE_AddImage(Bitu width, Bitu height, Bitu bpp, Bitu pitch, Bitu flags, float fps, Bit8u * data, Bit8u * pal) { + { + + static float OldFPS = 0; + if(fps != OldFPS) + { + fprintf(stderr, "\33[1m%d: FPS reported now as %g\33[m\n", getpid(), (double)fps); + OldFPS = fps; + } + + Bit8u* origdata = data; + Bit8u* curdata = data; + + const unsigned FrameShift = 3; + static unsigned FrameCounter = 0; + + //fprintf(stderr, "Getting %u\n", FrameCounter);fflush(stderr); + + /*if(width == 320 && height == 200) + { + static Bit8u x640x400[FrameShift][640*400]; + data = x640x400[FrameCounter]; + for(unsigned p=0,y=0; y<200; ++y) + { + for(unsigned d=y*2*640, x=0; x<320; ++x,++p, d+=2) + { + unsigned char c = origdata[p]; + data[d+0] = c; + data[d+1] = c; + } + memcpy(&data[(y*2+1)*640], &data[(y*2+0)*640], 640); + } + width=640; height=400; + }*/ + if(width == 320 && height == 200) + { + static Bit8u x640x200[FrameShift][640*200]; + data = x640x200[FrameCounter]; + for(unsigned p=0,y=0; y<200; ++y) + { + for(unsigned d=y*640, x=0; x<320; ++x,++p, d+=2) + { + unsigned char c = origdata[p]; + data[d+0] = c; + data[d+1] = c; + } + } + width=640; height=200; + curdata = data; + } + if(width == 640 && (height == 200 || height == 400)) + { + static Bit8u x1280x400[FrameShift][1280*400]; + data = x1280x400[FrameCounter]; + if(height == 200) + { + for(unsigned p=0,y=0; y<200; ++y) + { + for(unsigned d=y*2*1280, x=0; x<640; ++x,++p, d+=2) + { + unsigned char c = curdata[p]; + data[d+0] = c; + data[d+1] = c; + } + memcpy(&data[(y*2+1)*1280], + &data[(y*2+0)*1280], 1280); + } + } + else if(height == 400) + { + for(unsigned p=0,y=0; y<400; ++y) + { + for(unsigned d=y*1280, x=0; x<640; ++x,++p, d+=2) + { + unsigned char c = curdata[p]; + data[d+0] = c; + data[d+1] = c; + } + } + } + width=1280; height=400; + /* +static char TextBuf[] = +{ + "........&&&&&&...&&....&&&.&&&....&&&&&&\n" + "..........&&.....&&....&&.&.&&....&&&...\n" + "..........&&.....&&....&&...&&....&&&&&&\n" + "................................................................\n" + "..AAAAA.....HHHHH.......OOOOO.....aaaaa.......hhhhh.....ooooo...\n" + "BB.A.A.CC.II.H.H.JJ...PP.O.O.QQ.bb.a.a.cc...ii.h.h.jj.pp.o.o.qq.\n" + "BBB...CCC.III...JJJ...PPP...QQQ.bbb...ccc...iii...jjj.ppp...qqq.\n" + "BBB...CCC.III...JJJ.#.PPP...QQQ.bbb...ccc.#.iii...jjj.ppp...qqq.\n" + "BB.....CC.II.....JJ...PP.....QQ.bb.....cc...ii.....jj.pp.....qq.\n" + ".DDDDDDD...KKKKKKK.....RRRRRRR...ddddddd.....kkkkkkk...rrrrrrr..\n" + "EE.....GG.LL.....NN...SS.....UU.ee.....gg...ll.....nn.ss.....uu.\n" + "EEE...GGG.LLL...NNN.#.SSS...UUU.eee...ggg.#.lll...nnn.sss...uuu.\n" + "EEE...GGG.LLL...NNN...SSS...UUU.eee...ggg...lll...nnn.sss...uuu.\n" + "EE.F.F.GG.LL.M.M.NN...SS.T.T.UU.ee.f.f.gg...ll.m.m.nn.ss.t.t.uu.\n" + "..FFFFF.....MMMMM.......TTTTT.....fffff.......mmmmm.....ttttt...\n" +}; + static const char DigMask[7*10] = + { 1,1,1,0,1,1,1, + 0,0,1,0,0,0,1, + 1,0,1,1,1,1,0, + 1,0,1,1,0,1,1, + 0,1,1,1,0,0,1, + 1,1,0,1,0,1,1, + 1,1,0,1,1,1,1, + 1,1,1,0,0,0,1, + 1,1,1,1,1,1,1, + 1,1,1,1,0,1,1 }; + time_t t = time(NULL); + struct tm* tm = localtime(&t); + + unsigned digits[6] = + { + tm->tm_hour / 10, + tm->tm_hour % 10, + tm->tm_min / 10, + tm->tm_min % 10, + tm->tm_sec / 10, + tm->tm_sec % 10 + }; + char Enabled[256] = { 0 }; + Enabled['#'] = 2; + Enabled['&'] = 3; + for(unsigned d=0; d<6; ++d) + for(unsigned c=0; c<7; ++c) + Enabled["ABCDEFG""HIJKLMN""OPQRSTU" + "abcdefg""hijklmn""opqrstu"[d*7+c]] + = DigMask[c + 7*digits[d]] ? 2 : 1; + unsigned x = 1200; + unsigned y = 360; + for(const char* s = TextBuf; *s; ++s) + { + bool cross = (x^y)&1; + if(*s == '\n') { ++y; x = 1200; continue; } + switch(Enabled[*s]) + { + case 1: data[y*1280+x] = cross?1:0; break; + case 2: data[y*1280+x] = cross?11:15; break; + case 3: data[y*1280+x] = cross?6:14; break; + case 0: if(cross) data[y*1280+x] = 0; break; + } + ++x; + }*/ + } + + static Bit8u* Frames[FrameShift]; + //fprintf(stderr, "Got %u: %p\n", FrameCounter, data);fflush(stderr); + if(data == origdata) + { + fprintf(stderr, "This, I was not prepared for: %ux%u\n", + (unsigned)width, (unsigned)height); + } + Frames[FrameCounter] = data; + + if(++FrameCounter < FrameShift)return; + FrameCounter = 0; + + //fprintf(stderr, "Plushing\n");fflush(stderr); + + Bit8u* nesvdata = data; + if(bpp == 8) + { + nesvdata = new Bit8u[width*height*3]; + for(unsigned p=0; pGet_path("captures"); capturedir = proppath->realpath; CaptureState = 0; + fprintf(stderr, "CaptureState set to %u\n", CaptureState); MAPPER_AddHandler(CAPTURE_WaveEvent,MK_f6,MMOD1,"recwave","Rec Wave"); MAPPER_AddHandler(CAPTURE_MidiEvent,MK_f8,MMOD1|MMOD2,"caprawmidi","Cap MIDI"); #if (C_SSHOT) diff -NaHudr dosbox-0.73/src/Makefile.am dosbox-0.73-patched/src/Makefile.am --- dosbox-0.73/src/Makefile.am 2009-04-28 10:02:37.000000000 +0300 +++ dosbox-0.73-patched/src/Makefile.am 2010-04-04 01:30:39.280785772 +0300 @@ -11,10 +11,12 @@ .rc.o: $(WINDRES) -o $@ $< -dosbox_SOURCES = dosbox.cpp $(ico_stuff) +dosbox_SOURCES = dosbox.cpp nesvideos-piece.cc rgbtorgb.cc $(ico_stuff) dosbox_LDADD = cpu/libcpu.a debug/libdebug.a dos/libdos.a fpu/libfpu.a hardware/libhardware.a gui/libgui.a \ ints/libints.a misc/libmisc.a shell/libshell.a hardware/serialport/libserial.a libs/gui_tk/libgui_tk.a EXTRA_DIST = winres.rc dosbox.ico +dosbox_LDADD += -lutil -lgd -lx264 + diff -NaHudr dosbox-0.73/src/nesvideos-piece.cc dosbox-0.73-patched/src/nesvideos-piece.cc --- dosbox-0.73/src/nesvideos-piece.cc 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/nesvideos-piece.cc 2010-07-07 20:50:34.588435122 +0300 @@ -0,0 +1,1295 @@ +#define THREAD_SAFETY + +#include +#include +#include +#include +#include +#include + +#include // mknod, unlink, write +#include +#include // S_IFIFO +#include // fcntl +#include // poll +#include // setenv +#include // strrchr +#include // flock +#include +#include + +#include + +#include + +extern "C" { +#include +int x264_nal_encode( uint8_t *dst, int b_annexb, x264_nal_t *nal ); + +} + +/* Note: This module assumes everyone uses BGR32 as display depth */ + +//#define LOGO_LENGTH_HEADER (1.2) +//#define LOGO_LENGTH_OVERLAP (10.0-LOGO_LENGTH_HEADER) +//#define LOGO_LENGTH_HEADER (1.1) +//#define LOGO_LENGTH_OVERLAP (6.00-LOGO_LENGTH_HEADER) +//#define LOGO_LENGTH_OVERLAP (5.40-LOGO_LENGTH_HEADER) +//#define LOGO_LENGTH_OVERLAP (3-LOGO_LENGTH_HEADER) +//#define LOGO_LENGTH_HEADER (1.5) +#define LOGO_LENGTH_OVERLAP (0) +#define LOGO_LENGTH_HEADER (0) + +static std::string VIDEO_CMD = ""; +/* +-rawvideo on:fps=60:format=0x42475220:w=256:h=224:size=$[1024*224] +-audiofile "+AUDIO_FN+" +*/ +static std::string AUDIO_FN = "s3.log"; + +static bool Terminate=false; +static unsigned videonumber = 0; + +unsigned long long A_sent = 0; +unsigned long long V_sent = 0; + +#ifdef THREAD_SAFETY +# include +static pthread_mutex_t APIlock = PTHREAD_MUTEX_INITIALIZER; +struct ScopedLock +{ ScopedLock() { + pthread_mutex_lock(&APIlock); + //fprintf(stderr, "audio start\n"); fflush(stderr); + } + ~ScopedLock() { + //fprintf(stderr, "audio end\n"); fflush(stderr); + pthread_mutex_unlock(&APIlock); } +}; +#endif + +static unsigned NonblockWrite(FILE* fp, const unsigned char*buf, unsigned length) +{ + Retry: + int result = write(fileno(fp), buf, length); + if(result == -1 && errno==EAGAIN) + { + return 0; + } + if(result == -1 && errno==EINTR) goto Retry; + if(result == -1) + { + perror("write"); + Terminate=true; + return 0; + } + return result; +} +static int WaitUntilOneIsWritable(FILE*f1, FILE*f2, int whichmask = 3) +{ + struct pollfd po[2] = { {fileno(f1),POLLOUT,0}, {fileno(f2),POLLOUT,0} }; + + pollfd* po_ptr = po; + unsigned po_n = 2; + + if(whichmask == 1) // f1 only + { po_n = 1; po[1].revents = 0; } + else if(whichmask == 2) // f2 only + { po_ptr += 1; po[0].revents = 0; } + + poll(po_ptr, po_n, -1); + + return ((po[0].revents & POLLOUT) ? 1 : 0) + | ((po[1].revents & POLLOUT) ? 2 : 0); +} + +#define BGR32 0x42475220 // BGR32 fourcc +#define BGR24 0x42475218 // BGR24 fourcc +#define BGR16 0x42475210 // BGR16 fourcc +#define BGR15 0x4247520F // BGR15 fourcc +#define I420 0x30323449 // I420 fourcc +#define YUY2 0x32595559 // YUY2 fourcc + +static unsigned USE_FOURCC = BGR32; +static unsigned INPUT_BPP = 32; + +#define u32(n) (n)&255,((n)>>8)&255,((n)>>16)&255,((n)>>24)&255 +#define u16(n) (n)&255,((n)>>8)&255 +#define s4(s) s[0],s[1],s[2],s[3] + +static const unsigned FPS_SCALE = 0x1000000; + +static struct Construct +{ + Construct() + { + char Buf[4096]; + getcwd(Buf,sizeof(Buf)); + Buf[sizeof(Buf)-1]=0; + AUDIO_FN = Buf + std::string("/") + AUDIO_FN; + } +} Construct; + +namespace LogoInfo +{ + unsigned width; + unsigned height; + + bool SentVideo = false; + bool SentAudio = false; + int OverlapSent = 0; +} + + +class AVI +{ +public: + AVI() { } + virtual ~AVI() { } + + virtual void Audio + (unsigned r,unsigned b,unsigned c, + const unsigned char*d, unsigned nsamples) = 0; + + virtual void Video + (unsigned w,unsigned h,unsigned f, const unsigned char*d) = 0; + + virtual void SaveState(const std::string&) { } + virtual void LoadState(const std::string&) { } +}; + +class NormalAVI: public AVI +{ + FILE* vidfp; + FILE* audfp; + + bool KnowVideo; + unsigned vid_width; + unsigned vid_height; + unsigned vid_fps_scaled; + std::list > VideoBuffer; + unsigned VidBufSize; + + bool KnowAudio; + unsigned aud_rate; + unsigned aud_chans; + unsigned aud_bits; + std::list > AudioBuffer; + unsigned AudBufSize; + +public: + NormalAVI() : + vidfp(NULL), + audfp(NULL), + KnowVideo(false), VidBufSize(0), + KnowAudio(false), AudBufSize(0) + { + } + virtual ~NormalAVI() + { + while(VidBufSize && AudBufSize) + { + CheckFlushing(); + } + if(audfp) fclose(audfp); + if(vidfp) pclose(vidfp); + unlink(AUDIO_FN.c_str()); + } + + virtual void Audio + (unsigned r,unsigned b,unsigned c, + const unsigned char*d, unsigned nsamples) + { + if(Terminate) return; + if(!KnowAudio) + { + aud_rate = r; + aud_chans = c; + aud_bits = b; + KnowAudio = true; + } + CheckFlushing(); + + unsigned bytes = nsamples * aud_chans * (aud_bits / 8); + +#if 1 + static FILE* ouf = 0; + if(!ouf) + ouf = popen("lzop -F3 | ssh -c blowfish chii 'lzop -Fd > /mnt/gbatmp/audio_out.raw'", "w"); + fwrite(d, 1, bytes, ouf); + return; +#endif + unsigned wrote = 0; + if(KnowVideo && AudioBuffer.empty()) + { + //fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, "aud", (void*)d, (void*)audfp); + wrote = NonblockWrite(audfp, d, bytes); + //fprintf(stderr, "Wrote %u\n", wrote); + A_sent += wrote; + } + if(wrote < bytes) + { + unsigned remain = bytes-wrote; + //fprintf(stderr, "Buffering %u of %s (%p..%p)\n", remain, "aud", d+wrote, d+bytes); + AudioBuffer.push_back(std::vector(d+wrote, d+bytes)); + AudBufSize += remain; + } + CheckFlushing(); + } + + virtual void Video + (unsigned w,unsigned h,unsigned f, const unsigned char*d) + { + if(Terminate) return; + if(!KnowVideo) + { + vid_width = w; + vid_height = h; + vid_fps_scaled = f; + KnowVideo = true; + } + CheckFlushing(); + + unsigned bpp = INPUT_BPP; if(bpp == 15 || bpp == 17) bpp = 16; + unsigned bytes = vid_width * vid_height * bpp / 8; + +#if 1 + static FILE* ouf = 0; + if(!ouf) + ouf = popen("lzop -F3 | ssh -c blowfish chii 'lzop -Fd > /mnt/gbatmp/video_out.raw'", "w"); + fwrite(d, 1, bytes, ouf); + return; +#endif + //std::vector tmp(bytes, 'k'); + //d = &tmp[0]; + + unsigned wrote = 0; + if(KnowAudio && VideoBuffer.empty()) + { + CheckBegin(); + //fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, "vid", (void*)d, (void*)vidfp); + wrote = NonblockWrite(vidfp, d, bytes); + //fprintf(stderr, "Wrote %u\n", wrote); + V_sent += wrote; + } + + if(wrote < bytes) + { + unsigned remain = bytes-wrote; + //fprintf(stderr, "Buffering %u of %s (%p..%p)\n", remain, "vid", d+wrote, d+bytes); + + VideoBuffer.push_back(std::vector(d+wrote, d+bytes)); + VidBufSize += remain; + } + CheckFlushing(); + } + +private: + /* fp is passed as a reference because it may be NULL + * prior to calling, and this function changes it. */ + template + bool FlushBufferSome(BufType& List, unsigned& Size, FILE*& fp, const char* what) + { + what=what; + + Retry: + if(List.empty() || Terminate) return false; + + if(List.begin()->empty()) { List.erase(List.begin()); goto Retry; } + + CheckBegin(); + + typename BufType::iterator i = List.begin(); + std::vector& buf = *i; + + unsigned bytes = buf.size(); + + //fprintf(stderr, "Writing %u of %s from %p to %p\t", bytes, what, (void*)&buf[0], (void*)fp); + + unsigned ate = NonblockWrite(fp, &buf[0], bytes); + if(ate == 0) + return false; + + //fprintf(stderr, "Wrote %u\n", ate); + + if(what[0] == 'v') + V_sent += ate; + else + A_sent += ate; + + buf.erase(buf.begin(), buf.begin()+ate); + + Size -= ate; + + if(buf.empty()) + { + List.erase(i); + } + return true; + } + + void CheckFlushing() + { + //AudioBuffer.clear(); + //VideoBuffer.clear(); + + if(KnowAudio && KnowVideo && !Terminate) + { + const int LogoFramesHeader = (int)( (LOGO_LENGTH_HEADER * 60)); + const int LogoFramesOverlap = (int)( (LOGO_LENGTH_OVERLAP * 60)); + + unsigned nloops = 0; + while ((!AudioBuffer.empty() && !VideoBuffer.empty()) + || (LogoInfo::OverlapSent >= LogoFramesOverlap + && VideoBuffer.size() >= 50 + && nloops++ < 10)) + { + /* vidfp = &1, audfp = &2 */ + int attempt = WaitUntilOneIsWritable(vidfp, audfp, + (AudioBuffer.empty() ? 0 : 2) + | (VideoBuffer.empty() ? 0 : 1) + ); + + if(attempt <= 0) break; /* Some kind of error can cause this */ + + // Flush Audio + if(attempt&2) FlushBufferSome(AudioBuffer, AudBufSize, audfp, "aud"); + + // Flush Video + if(attempt&1) FlushBufferSome(VideoBuffer, VidBufSize, vidfp, "vid"); + } + + while(FlushBufferSome(VideoBuffer, VidBufSize, vidfp, "vid")) {} + while(FlushBufferSome(AudioBuffer, AudBufSize, audfp, "aud")) {} + + /* + fprintf(stderr, "Buffer Sizes: Audio %u(%u) video %u(%u) -- sent A=%llu, V=%llu\n", + (unsigned)AudioBuffer.size(), AudBufSize, + (unsigned)VideoBuffer.size(), VidBufSize, + A_sent, V_sent); + */ + } + } + std::string GetMEncoderRawvideoParam() const + { + char Buf[512]; + unsigned bpp = INPUT_BPP; if(bpp == 15 || bpp == 17) bpp = 16; + sprintf(Buf, "fps=%g:format=0x%04X:w=%u:h=%u:size=%u", + vid_fps_scaled / (double)FPS_SCALE, + USE_FOURCC, + vid_width, + vid_height, + vid_width*vid_height * bpp/8); + return Buf; + } + std::string GetMEncoderRawaudioParam() const + { + char Buf[512]; + sprintf(Buf, "channels=%u:rate=%u:samplesize=%u:bitrate=%u", + aud_chans, + aud_rate, + aud_bits/8, + aud_rate*aud_chans*(aud_bits/8) ); + return Buf; + } + std::string GetMEncoderCommand() const + { + std::string mandatory = "-audiofile " + AUDIO_FN + + " -audio-demuxer rawaudio" + + " -demuxer rawvideo" + + " -rawvideo " + GetMEncoderRawvideoParam() + + " -rawaudio " + GetMEncoderRawaudioParam() + ; + std::string cmd = VIDEO_CMD; + + std::string::size_type p = cmd.find("NESV""SETTINGS"); + if(p != cmd.npos) + cmd = cmd.replace(p, 4+8, mandatory); + else + fprintf(stderr, "Warning: NESVSETTINGS not found in videocmd\n"); + + char videonumstr[64]; + sprintf(videonumstr, "%u", videonumber); + + for(;;) + { + p = cmd.find("VIDEO""NUMBER"); + if(p == cmd.npos) break; + cmd = cmd.replace(p, 5+6, videonumstr); + } + + fprintf(stderr, "Launch: %s\n", cmd.c_str()); fflush(stderr); + + return cmd; + } + + void CheckBegin() + { + if(!audfp) + { + unlink(AUDIO_FN.c_str()); + mknod(AUDIO_FN.c_str(), S_IFIFO|0666, 0); + } + + if(!vidfp) + { + /* Note: popen does not accept b/t in mode param */ + setenv("LD_PRELOAD", "", 1); + vidfp = popen(GetMEncoderCommand().c_str(), "w"); + if(!vidfp) + { + perror("Launch failed"); + } + else + { + fcntl(fileno(vidfp), F_SETFL, O_WRONLY | O_NONBLOCK); + } + } + + if(!audfp) + { + Retry: + audfp = fopen(AUDIO_FN.c_str(), "wb"); + + if(!audfp) + { + perror(AUDIO_FN.c_str()); + if(errno == ESTALE) goto Retry; + } + else + { + fcntl(fileno(audfp), F_SETFL, O_WRONLY | O_NONBLOCK); + } + } + } +}; + +class RerecordingAVI: public AVI +{ + std::map > FrameStates; + size_t aud_framesize; + size_t vid_framesize; + + FILE* vidfp; + FILE* audfp; + FILE* eventfp; + FILE* statefp; + /* + std::string vidfn; + std::string audfn; + std::string eventfn; + std::string statefn; + */ + + x264_t* x264; + x264_param_t param; + bool forcekey; + + class LockF + { + public: + LockF(FILE* f) : fp(f) { flock(fileno(fp), LOCK_EX); } + ~LockF() { flock(fileno(fp), LOCK_UN); } + private: + LockF(const LockF&); + LockF& operator=(const LockF&); + FILE* fp; + }; + +public: + RerecordingAVI(long FrameNumber) + : aud_framesize(0), + vid_framesize(0), + x264(0), + forcekey(true) + { + SetFn(); + } + virtual ~RerecordingAVI() + { + if(eventfp) + { + off_t vidpos = ftello(vidfp); + off_t audpos = ftello(audfp); + fprintf(eventfp, + "%llX %llX End\n", + (long long)vidpos, (long long)audpos); + } + if(vidfp) fclose(vidfp); + if(audfp) fclose(audfp); + if(eventfp) fclose(eventfp); + if(statefp) fclose(statefp); + + if(x264) x264_encoder_close(x264); + } + + virtual void Audio + (unsigned aud_rate,unsigned aud_bits,unsigned aud_chans, + const unsigned char*data, unsigned nsamples) + { + size_t bytes = nsamples * aud_chans * (aud_bits / 8); + size_t framesize = aud_rate * aud_chans * (aud_bits / 8); + + if(framesize != aud_framesize) + { + aud_framesize = framesize; + LockF el(eventfp); + fprintf(eventfp, "AudFrameSize %lu\n", (unsigned long)aud_framesize); + fflush(eventfp); + } + + LockF al(audfp); + fwrite(data, 1, bytes, audfp); + } + + virtual void Video + (unsigned vid_width,unsigned vid_height, + unsigned vid_fps_scaled, const unsigned char*data) + { + unsigned bpp = INPUT_BPP; if(bpp == 15 || bpp == 17) bpp = 16; + size_t bytes = vid_width * vid_height * bpp / 8; + size_t framesize = bytes; + + if(framesize != vid_framesize) + { + vid_framesize = framesize; + LockF el(eventfp); + fprintf(eventfp, "VidFrameSize %lu\n", (unsigned long)vid_framesize); + fflush(eventfp); + } + + LockF vl(vidfp); + + if(bpp == 12) /* For I420, we use a local X264 encoder */ + { + if(!x264) + { + x264_param_default(¶m); + x264_param_parse(¶m, "psnr", "no"); + x264_param_parse(¶m, "ssim", "no"); + param.i_width = vid_width; + param.i_height = vid_height; + param.i_csp = X264_CSP_I420; + //param.i_scenecut_threshold = -1; + //param.b_bframe_adaptive = 0; + //param.rc.i_rc_method = X264_RC_CRF; + //param.rc.i_qp_constant = 0; + x264_param_parse(¶m, "me", "dia"); + x264_param_parse(¶m, "crf", "6"); + x264_param_parse(¶m, "frameref", "8"); + param.i_frame_reference = 1; + param.analyse.i_subpel_refine = 1; + param.analyse.i_me_method = X264_ME_DIA; + /* + param.analyse.inter = 0; + param.analyse.b_transform_8x8 = 0; + param.analyse.b_weighted_bipred = 0; + param.analyse.i_trellis = 0; + */ + //param.b_repeat_headers = 1; // guess this might be needed + + param.i_fps_num = vid_fps_scaled; + param.i_fps_den = 1 << 24; + + x264 = x264_encoder_open(¶m); + if(!x264) + { + fprintf(stderr, "x264_encoder_open failed.\n"); + goto raw_fallback; + } + } + + const size_t npixels = vid_width * vid_height; + x264_picture_t pic; + pic.i_type = forcekey ? X264_TYPE_IDR : X264_TYPE_AUTO; + pic.i_pts = 0; + pic.i_qpplus1 = 0; + pic.img.i_csp = X264_CSP_I420; + pic.img.i_plane = 3; + pic.img.i_stride[0] = vid_width; + pic.img.i_stride[1] = vid_width / 2; + pic.img.i_stride[2] = vid_width / 2; + pic.img.plane[0] = const_cast(data) + npixels*0/4; + pic.img.plane[1] = const_cast(data) + npixels*4/4; + pic.img.plane[2] = const_cast(data) + npixels*5/4; + + x264_nal_t* nal; int i_nal; + x264_picture_t pic_out; + if(x264_encoder_encode(x264, &nal, &i_nal, &pic, &pic_out) < 0) + { + fprintf(stderr, "x264_encoder_encode failed\n"); + goto raw_fallback; + } + int i_size = 0; + for(int i=0; i muxbuf(i_size); + i_size = 0; + for(int i=0; i 0) + fwrite(&muxbuf[0], 1, i_size, vidfp); + } + else + { + raw_fallback: + fwrite(data, 1, bytes, vidfp); + } + + if(eventfp) + { + LockF el(eventfp); + off_t vidpos = ftello(vidfp); + off_t audpos = ftello(audfp); + fprintf(eventfp, + "%llX %llX Mark\n", + (long long)vidpos, (long long)audpos); + fflush(eventfp); + } + } + + virtual void SaveState(const std::string& slot) + { + LockF el(eventfp); + + off_t vidpos = ftello(vidfp); + off_t audpos = ftello(audfp); + + fprintf(eventfp, + "%llX %llX Save %s\n", + (long long)vidpos, (long long)audpos, slot.c_str()); + fflush(eventfp); + + FrameStates[slot] = std::make_pair(vidpos, audpos); + WriteStates(); + + forcekey = true; + } + + virtual void LoadState(const std::string& slot) + { + LockF el(eventfp); + + const std::pair& old = FrameStates[slot]; + off_t vidpos = ftello(vidfp); + off_t audpos = ftello(audfp); + fprintf(eventfp, + "%llX %llX Load %llX %llX %s\n", + (long long)vidpos, (long long)audpos, + (long long)old.first, + (long long)old.second, + slot.c_str()); + fflush(eventfp); + + forcekey = true; + } +private: + void SetFn() + { + std::string vidfn = VIDEO_CMD + ".vid"; + std::string audfn = VIDEO_CMD + ".aud"; + std::string eventfn = VIDEO_CMD + ".log"; + std::string statefn = VIDEO_CMD + ".state"; + vidfp = fopen(vidfn.c_str(), "ab+"); + audfp = fopen(audfn.c_str(), "ab+"); + eventfp = fopen(eventfn.c_str(), "ab+"); + statefp = fopen2(statefn.c_str(), "rb+", "wb+"); + ReadStates(); + + if(eventfp) + { + off_t vidpos = ftello(vidfp); + off_t audpos = ftello(audfp); + fprintf(eventfp, + "%llX %llX Begin\n", + (long long)vidpos, (long long)audpos); + } + } + static FILE* fopen2(const char* fn, const char* mode1, const char* mode2) + { + FILE* result = fopen(fn, mode1); + if(!result) result = fopen(fn, mode2); + return result; + } + void ReadStates() + { + LockF sl(statefp); + + char Buf[4096]; + rewind(statefp); + FrameStates.clear(); + while(fgets(Buf, sizeof(Buf), statefp)) + { + if(*Buf == '-') break; + char slotname[4096]; + long long vidpos, audpos; + strtok(Buf, "\r"); strtok(Buf, "\n"); + sscanf(Buf, "%llX %llX %4095s", &vidpos, &audpos, slotname); + FrameStates[slotname] = std::pair (vidpos, audpos); + } + } + void WriteStates() + { + LockF sl(statefp); + + rewind(statefp); + for(std::map >::const_iterator + i = FrameStates.begin(); i != FrameStates.end(); ++i) + { + fprintf(statefp, "%llX %llX %s\n", + (long long) i->second.first, + (long long) i->second.second, + i->first.c_str()); + } + fprintf(statefp, "-\n"); + fflush(statefp); + } +}; + + +static AVI* AVI = 0; + +#include "quantize.hh" +#include "rgbtorgb.hh" + +static bool RerecordingMode = false; +static long CurrentFrameNumber = 0; + +extern "C" +{ + int LoggingEnabled = 0; /* 0=no, 1=yes, 2=recording! */ + + const char* NESVideoGetVideoCmd() + { + return VIDEO_CMD.c_str(); + } + void NESVideoSetVideoCmd(const char *cmd) + { +#ifdef THREAD_SAFETY + ScopedLock lock; +#endif + + VIDEO_CMD = cmd; + } + + void NESVideoSetRerecordingMode(long FrameNumber) + { + //const int LogoFramesOverlap = (int)( (LOGO_LENGTH_OVERLAP * fps_scaled) / (1 << 24) ); + RerecordingMode = true; + CurrentFrameNumber = FrameNumber; + LogoInfo::SentVideo = FrameNumber > 0; + LogoInfo::SentAudio = FrameNumber > 0; + LogoInfo::OverlapSent = FrameNumber; + } + + static class AVI& GetAVIptr() + { + if(!AVI) + { + if(RerecordingMode) + { + fprintf(stderr, "Beginning rerecording project at frame %ld\n", CurrentFrameNumber); + AVI = new RerecordingAVI(CurrentFrameNumber); + } + else + { + fprintf(stderr, "Starting new AVI (num %u)\n", videonumber); + AVI = new NormalAVI; + } + } + return *AVI; + } + + void NESVideoRerecordingSave(const char* slot) + { + GetAVIptr().SaveState(slot); + } + + void NESVideoRerecordingLoad(const char* slot) + { + GetAVIptr().LoadState(slot); + } + + void NESVideoNextAVI() + { +#ifdef THREAD_SAFETY + ScopedLock lock; +#endif + + if(AVI) + { + fprintf(stderr, "Closing AVI (next will be started)\n"); + delete AVI; + AVI = 0; + ++videonumber; + } + } + + static void Overlay32With32(unsigned char* target, const unsigned char* source, int alpha) + { + target[0] += ((int)(source[0] - target[0])) * alpha / 255; + target[1] += ((int)(source[1] - target[1])) * alpha / 255; + target[2] += ((int)(source[2] - target[2])) * alpha / 255; + } + + static void OverlayLogoFrom(const char* fn, std::vector& data) + { + FILE*fp = fopen(fn, "rb"); + if(!fp) perror(fn); + if(!fp) return; /* Silently ignore missing frames */ + + gdImagePtr im = gdImageCreateFromPng(fp); + if(!im) + { + fprintf(stderr, "'%s': Failed to open image\n", fn); + goto CloseIm; + } + if(!gdImageTrueColor(im)) + { + fprintf(stderr, "'%s': Only true color images are supported\n", fn); + goto CloseIm; + } + {/*scope begin*/ + + unsigned new_width = gdImageSX(im); + unsigned new_height= gdImageSY(im); + + if(new_width != LogoInfo::width + || new_height != LogoInfo::height) + { + if(new_height < LogoInfo::height || new_height > LogoInfo::height+20) + fprintf(stderr, "'%s': ERROR, expected %dx%d, got %dx%d\n", fn, + LogoInfo::width, LogoInfo::height, + new_width, new_height); + } + + for(unsigned y=0; y > files; + if(files.empty()) /* Cache the list of logo files. */ + { + static const char GlobPat[] = "logo_*_*_f*.png"; + glob_t globdata; + globdata.gl_offs = 0; + fprintf(stderr, "Loading list of usable logo animation files in %s...\n", avdir.c_str()); + int globres = glob( (avdir + GlobPat).c_str(), GLOB_NOSORT, NULL, &globdata); + if(globres == 0) + { + for(size_t n=0; n >::const_iterator + i = files.find(frameno); + if(i != files.end()) + { + std::string best; + int bestdist = -1; + + const std::vector& fnames = i->second; + for(size_t b=fnames.size(), a=0; a= 0) want = avdir + best; + } + } + return want; + } + + static const std::vector NVConvert24To16Frame + (const std::vector& logodata) + { + std::vector result(LogoInfo::width * LogoInfo::height * 2); + Convert24To16Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width); + return result; + } + static const std::vector NVConvert24ToR16Frame + (const std::vector& logodata) + { + std::vector result(LogoInfo::width * LogoInfo::height * 2); + Convert24ToR16Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width); + return result; + } + static const std::vector NVConvert24To15Frame + (const std::vector& logodata) + { + std::vector result(LogoInfo::width * LogoInfo::height * 2); + Convert24To15Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width); + return result; + } + + static const std::vector NVConvert24To_I420Frame + (const std::vector& logodata) + { + std::vector result(LogoInfo::width * LogoInfo::height * 3 / 2); + Convert24To_I420Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width); + return result; + } + + static const std::vector NVConvert24To_YUY2Frame + (const std::vector& logodata) + { + std::vector result(LogoInfo::width * LogoInfo::height * 3 / 2); + Convert24To_YUY2Frame(&logodata[0], &result[0], LogoInfo::width * LogoInfo::height, LogoInfo::width); + return result; + } + + static const std::vector NVConvert16To24Frame + (const void* data, unsigned npixels) + { + std::vector logodata(npixels*3); /* filled with black. */ + Convert16To24Frame(data, &logodata[0], npixels, true); + return logodata; + } + + static const std::vector NVConvertR16To24Frame + (const void* data, unsigned npixels) + { + std::vector logodata(npixels*3); /* filled with black. */ + Convert16To24Frame(data, &logodata[0], npixels, false); + return logodata; + } + + static const std::vector NVConvert15To24Frame + (const void* data, unsigned npixels) + { + std::vector logodata(npixels*3); /* filled with black. */ + Convert15To24Frame(data, &logodata[0], npixels); + return logodata; + } + + static const std::vector NVConvert_I420To24Frame + (const void* data, unsigned npixels) + { + std::vector logodata(npixels*3); /* filled with black. */ + Convert_I420To24Frame(data, &logodata[0], npixels, LogoInfo::width); + return logodata; + } + + static const std::vector NVConvert_YUY2To24Frame + (const void* data, unsigned npixels) + { + std::vector logodata(npixels*3); /* filled with black. */ + Convert_YUY2To24Frame(data, &logodata[0], npixels, LogoInfo::width); + return logodata; + } + + static void SubstituteWithBlackIfNeeded(const void*& data) + { + /* If the first frames of the animation consist of a + * single color (such as gray for NES), replace them + * with black to avoid ugly backgrounds on logo animations + */ + + static bool Deviate = false; + static short* Replacement = 0; + static unsigned wid=0, hei=0; + if(Deviate) + { + if(Replacement) { delete[] Replacement; Replacement=0; } + return; + } + + unsigned dim = LogoInfo::width * LogoInfo::height; + const short* p = (const short*)data; + for(unsigned a=0; a VideoBuf; + VideoBuf.resize(width*height * 3); + + Convert32To24Frame(data, &VideoBuf[0], width*height); + data = (void*)&VideoBuf[0]; + } + + if(bpp) INPUT_BPP = bpp; + + switch(INPUT_BPP) + { + case 32: USE_FOURCC = BGR32; break; + case 24: USE_FOURCC = BGR24; break; + case 16: USE_FOURCC = BGR16; break; + case 15: USE_FOURCC = BGR15; break; + case 12: USE_FOURCC = I420; break; + case 17: USE_FOURCC = YUY2; break; + } + //USE_FOURCC = BGR24; // FIXME TEMPORARY + + const int LogoFramesHeader = (int)( (LOGO_LENGTH_HEADER * fps_scaled) / (1 << 24) ); + const int LogoFramesOverlap = (int)( (LOGO_LENGTH_OVERLAP * fps_scaled) / (1 << 24) ); + + LogoInfo::width = width; + LogoInfo::height = height; + + if(INPUT_BPP == 16 || INPUT_BPP == 15) + { + SubstituteWithBlackIfNeeded(data); + } + else if(INPUT_BPP != 24 && INPUT_BPP != 12 && INPUT_BPP != 17) + { + fprintf(stderr, "NESVIDEOS_PIECE only supports 16 and 24 bpp, you gave %u bpp\n", + bpp); + return; + } + + if(!LogoInfo::SentVideo) + { + /* Send animation frames that do not involve source video? */ + LogoInfo::SentVideo=true; + + if(LogoFramesHeader > 0) + { + for(int frame = 0; frame < LogoFramesHeader; ++frame) + { + std::vector logodata(width*height*3); /* filled with black. */ + + std::string fn = GetLogoFileName(frame); + /*fprintf(stderr, "wid=%d(%d), hei=%d(%d),fn=%s\n", + width, LogoInfo::width, + height, LogoInfo::height, + fn.c_str());*/ + OverlayLogoFrom(fn.c_str(), logodata); + + //INPUT_BPP = 24; USE_FOURCC = BGR24; // FIXME TEMPORARY + + if(INPUT_BPP == 16) + { + std::vector result = NVConvert24ToR16Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 15) + { + std::vector result = NVConvert24To15Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 12) + { + std::vector result = NVConvert24To_I420Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 17) + { + std::vector result = NVConvert24To_YUY2Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else + { + GetAVIptr().Video(width,height,fps_scaled, &logodata[0]); + } + } + } + } + + if(LogoInfo::OverlapSent < LogoFramesOverlap) + { + /* Send animation frames that mix source and animation? */ + + std::string fn = GetLogoFileName(LogoInfo::OverlapSent + LogoFramesHeader); + /* + fprintf(stderr, "wid=%d(%d), hei=%d(%d),fn=%s\n", + width, LogoInfo::width, + height, LogoInfo::height, + fn.c_str());*/ + + std::vector logodata; + if(INPUT_BPP == 16) + { + logodata = NVConvertR16To24Frame(data, width*height); + } + else if(INPUT_BPP == 15) + { + logodata = NVConvert15To24Frame(data, width*height); + } + else if(INPUT_BPP == 17) + { + logodata = NVConvert_YUY2To24Frame(data, width*height); + } + else if(INPUT_BPP == 12) + { + logodata = NVConvert_I420To24Frame(data, width*height); + } + else + { + logodata.resize(width*height*3); /* filled with black. */ + memcpy(&logodata[0], data, width*height*3); + } + + OverlayLogoFrom(fn.c_str(), logodata); + + if(INPUT_BPP == 16) + { + std::vector result = NVConvert24ToR16Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 15) + { + std::vector result = NVConvert24To15Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 12) + { + std::vector result = NVConvert24To_I420Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else if(INPUT_BPP == 17) + { + std::vector result = NVConvert24To_YUY2Frame(logodata); + GetAVIptr().Video(width,height,fps_scaled, &result[0]); + } + else + { + GetAVIptr().Video(width,height,fps_scaled, &logodata[0]); + } + + ++LogoInfo::OverlapSent; + return; + } + + GetAVIptr().Video(width,height,fps_scaled, (const unsigned char*) data); + } + + void NESVideoLoggingAudio + (const void*data, + unsigned rate, unsigned bits, unsigned chans, + unsigned nsamples) + { + if(LoggingEnabled < 2) return; + + ++CurrentFrameNumber; + +#ifdef THREAD_SAFETY + ScopedLock lock; +#endif + + if(!LogoInfo::SentAudio && LOGO_LENGTH_HEADER > 0) + { + LogoInfo::SentAudio=true; + + double HdrLength = LOGO_LENGTH_HEADER; // N64 workaround + + const long n = (long)(rate * HdrLength)/* + - (rate * 0.11)*/; + + if(n > 0) { + unsigned bytes = n*chans*(bits/8); + unsigned char* buf = (unsigned char*)malloc(bytes); + if(buf) + { + memset(buf,0,bytes); + GetAVIptr().Audio(rate,bits,chans, buf, n); + free(buf); + } } + } + + /* + fprintf(stderr, "Writing %u samples (%u bits, %u chans, %u rate)\n", + nsamples, bits, chans, rate);*/ + + /* + static FILE*fp = fopen("audiodump.wav", "wb"); + fwrite(data, 1, nsamples*(bits/8)*chans, fp); + fflush(fp);*/ + + GetAVIptr().Audio(rate,bits,chans, (const unsigned char*) data, nsamples); + } +} /* extern "C" */ diff -NaHudr dosbox-0.73/src/nesvideos-piece.hh dosbox-0.73-patched/src/nesvideos-piece.hh --- dosbox-0.73/src/nesvideos-piece.hh 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/nesvideos-piece.hh 2010-02-25 23:50:10.326593232 +0200 @@ -0,0 +1,42 @@ +#ifndef NESVPIECEhh +#define NESVPIECEhh + +#define NESVIDEOS_LOGGING 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* Is video logging enabled? 0=no, 1=yes, 2=active. Default value: 0 */ +extern int LoggingEnabled; + +/* Get and set the video recording command (shell command) */ +extern const char* NESVideoGetVideoCmd(); +extern void NESVideoSetVideoCmd(const char *cmd); + +/* Save 1 frame of video. (Assumed to be 16-bit RGB) */ +/* FPS is scaled by 24 bits (*0x1000000) */ +/* Does not do anything if LoggingEnabled<2. */ +extern void NESVideoLoggingVideo + (const void*data, unsigned width, unsigned height, + unsigned fps_scaled, + unsigned bpp); + +/* Save N bytes of audio. bytes_per_second is required on the first call. */ +/* Does not do anything if LoggingEnabled<2. */ +/* The interval of calling this function is not important, as long as all the audio + * data is eventually written without too big delay (5 seconds is too big) + * This function may be called multiple times per video frame, or once per a few video + * frames, or anything in between. Just that all audio data must be written exactly once, + * and in order. */ +extern void NESVideoLoggingAudio + (const void*data, + unsigned rate, unsigned bits, unsigned chans, + unsigned nsamples); +/* nsamples*chans*(bits/8) = bytes in *data. */ + +#ifdef __cplusplus +} +#endif + +#endif diff -NaHudr dosbox-0.73/src/quantize.hh dosbox-0.73-patched/src/quantize.hh --- dosbox-0.73/src/quantize.hh 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/quantize.hh 2008-02-20 23:44:33.773495959 +0200 @@ -0,0 +1,185 @@ +/* + Ordered dithering methods provided for: + 8x8 (Quantize8x8) + 4x4 (Quantize4x4) + 3x3 (Quantize3x3) + 4x2 (Quantize4x2) + 3x2 (Quantize3x2) + 2x2 (Quantize2x2) + The functions are: + + template + int QuantizeFunc(size_t quant_pos, double value) + + - Quantizes value, assumed to be in range 0..in_max, to range 0..m + - quant_pos tells the coordinate into the dithering matrix + + template + int QuantizeFunc(size_t quant_pos, unsigned value) + + - Quantizes value, assumed to be in range 0..in_max, to range 0..m + - quant_pos tells the coordinate into the dithering matrix + + Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/) +*/ + +#define OrderedDitherDecl(n) \ + static const double flts[n]; \ + static const int ints[n]; \ + enum { mul = n+1, \ + maxin = in_max, \ + even = !(maxin % mul), \ + intmul = even ? 1 : mul }; + +/* macroes for initializing dither tables */ +#define d(n) (n)/double(mul) - 0.5 +#define i(n) even ? (n*in_max/mul - (int)in_max/2) \ + : (n*in_max - (int)mul*in_max/2) + +template +struct QuantizeNoDither +{ + int res; + template + QuantizeNoDither(IntType v) : res(v * m / in_max) { } + operator int() const { return res; } +}; + +template +struct QuantizeFuncBase: private Base +{ + int res; + + QuantizeFuncBase(size_t quant_pos, double v) : res(0) + { + if(v > 0.0) + { + const double dither_threshold = Base::flts[quant_pos]; + res = (int)(v * (m / double(Base::maxin)) + dither_threshold); + if(res > m) res = m; + } + } + + QuantizeFuncBase(size_t quant_pos, unsigned char v) : res(v) + { + if(m == Base::maxin) return; + if(m < Base::maxin) + { + // With dithering + const int dither_threshold = Base::ints[quant_pos]; + const int intmul = Base::intmul; + res = (res * (m * intmul) + dither_threshold) / (Base::maxin * intmul); + } + else + { + // Without dithering + res = QuantizeNoDither (res); + } + } +}; + +#define QuantizeFuncDecl(name, base) \ + template \ + struct name: private QuantizeFuncBase > \ + { \ + typedef QuantizeFuncBase > Base; \ + template name(A a, B b) : Base(a, b) { } \ + operator int() const { return Base::res; } \ + } + +/******* Quantizing with 8x8 ordered dithering ********/ +template struct OrderedDither_8x8 { OrderedDitherDecl(8*8) }; + template + const double OrderedDither_8x8::flts[] /* A table for 8x8 ordered dithering */ + = { d(1 ), d(49), d(13), d(61), d( 4), d(52), d(16), d(64), + d(33), d(17), d(45), d(29), d(36), d(20), d(48), d(32), + d(9 ), d(57), d( 5), d(53), d(12), d(60), d( 8), d(56), + d(41), d(25), d(37), d(21), d(44), d(28), d(40), d(24), + d(3 ), d(51), d(15), d(63), d( 2), d(50), d(14), d(62), + d(35), d(19), d(47), d(31), d(34), d(18), d(46), d(30), + d(11), d(59), d( 7), d(55), d(10), d(58), d( 6), d(54), + d(43), d(27), d(39), d(23), d(42), d(26), d(38), d(22) }; + template + const int OrderedDither_8x8::ints[] + = { i(1 ), i(49), i(13), i(61), i( 4), i(52), i(16), i(64), + i(33), i(17), i(45), i(29), i(36), i(20), i(48), i(32), + i(9 ), i(57), i( 5), i(53), i(12), i(60), i( 8), i(56), + i(41), i(25), i(37), i(21), i(44), i(28), i(40), i(24), + i(3 ), i(51), i(15), i(63), i( 2), i(50), i(14), i(62), + i(35), i(19), i(47), i(31), i(34), i(18), i(46), i(30), + i(11), i(59), i( 7), i(55), i(10), i(58), i( 6), i(54), + i(43), i(27), i(39), i(23), i(42), i(26), i(38), i(22) }; +QuantizeFuncDecl(Quantize8x8, OrderedDither_8x8); + + +/******* Quantizing with 4x4 ordered dithering ********/ +template struct OrderedDither_4x4 { OrderedDitherDecl(4*4) }; + template + const double OrderedDither_4x4::flts[] /* A table for 4x4 ordered dithering */ + = { d( 1), d( 9), d( 3), d(11), + d(13), d( 5), d(15), d( 7), + d( 4), d(12), d( 2), d(10), + d(16), d( 8), d(14), d( 6) }; + template + const int OrderedDither_4x4::ints[] + = { i( 1), i( 9), i( 3), i(11), + i(13), i( 5), i(15), i( 7), + i( 4), i(12), i( 2), i(10), + i(16), i( 8), i(14), i( 6) }; +QuantizeFuncDecl(Quantize4x4, OrderedDither_4x4); + +/******* Quantizing with 3x3 ordered dithering ********/ +template struct OrderedDither_3x3 { OrderedDitherDecl(3*3) }; + template + const double OrderedDither_3x3::flts[] /* A table for 3x3 ordered dithering */ + = { d(1), d(7), d(3), + d(6), d(4), d(9), + d(8), d(2), d(5) }; + template + const int OrderedDither_3x3::ints[] + = { i(1), i(7), i(3), + i(6), i(4), i(9), + i(8), i(2), i(5) }; +QuantizeFuncDecl(Quantize3x3, OrderedDither_3x3); + +/******* Quantizing with 4x2 ordered dithering ********/ +template struct OrderedDither_4x2 { OrderedDitherDecl(4*2) }; + template + const double OrderedDither_4x2::flts[] /* A table for 4x2 ordered dithering */ + = { d(1), d(5), d(2), d(6), + d(7), d(3), d(8), d(4) }; + template + const int OrderedDither_4x2::ints[] + = { i(1), i(5), i(2), i(6), + i(7), i(3), i(8), i(4) }; +QuantizeFuncDecl(Quantize4x2, OrderedDither_4x2); + +/******* Quantizing with 3x2 ordered dithering ********/ +template struct OrderedDither_3x2 { OrderedDitherDecl(3*2) }; + template + const double OrderedDither_3x2::flts[] /* A table for 3x2 ordered dithering */ + = { d(1), d(5), d(3), + d(4), d(2), d(6) }; + template + const int OrderedDither_3x2::ints[] + = { i(1), i(5), i(3), + i(4), i(2), i(6) }; +QuantizeFuncDecl(Quantize3x2, OrderedDither_3x2); + +/******* Quantizing with 2x2 ordered dithering ********/ +template struct OrderedDither_2x2 { OrderedDitherDecl(2*2) }; + template + const double OrderedDither_2x2::flts[] /* A table for 2x2 ordered dithering */ + = { d(1), d(4), + d(3), d(2) }; + template + const int OrderedDither_2x2::ints[] + = { i(1), i(4), + i(3), i(2) }; +QuantizeFuncDecl(Quantize2x2, OrderedDither_2x2); + + +#undef OrderedDitherDecl +#undef QuantizeFuncDecl +#undef i +#undef d diff -NaHudr dosbox-0.73/src/rgbtorgb.cc dosbox-0.73-patched/src/rgbtorgb.cc --- dosbox-0.73/src/rgbtorgb.cc 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/rgbtorgb.cc 2010-02-26 02:42:25.092840724 +0200 @@ -0,0 +1,1142 @@ +#include +#include // for size_t +#include +#include + +/* RGB to RGB and RGB from/to I420 conversions written by Bisqwit + * Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/) + */ + +typedef uint_least64_t uint64_t; + +#include "quantize.hh" +#include "rgbtorgb.hh" +#include "simd.hh" + +/* For BPP conversions */ + +static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL; +static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL; +static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL; +static const uint64_t mask24hhh __attribute__((aligned(8))) = 0xffffffff00000000ULL; +static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff0000ULL; + +static const uint64_t mask64h __attribute__((aligned(8))) = 0xFF00FF00FF00FF00ULL; +static const uint64_t mask64l __attribute__((aligned(8))) = 0x00FF00FF00FF00FFULL; +static const uint64_t mask64hw __attribute__((aligned(8))) = 0xFFFF0000FFFF0000ULL; +static const uint64_t mask64lw __attribute__((aligned(8))) = 0x0000FFFF0000FFFFULL; +static const uint64_t mask64hd __attribute__((aligned(8))) = 0xFFFFFFFF00000000ULL; +static const uint64_t mask64ld __attribute__((aligned(8))) = 0x00000000FFFFFFFFULL; + +/* For RGB2YUV: */ + +static const int RGB2YUV_SHIFT = 15; /* highest value where [RGB][YUV] fit in signed short */ + +static const int RY = 8414; // ((int)(( 65.738/256.0)*(1< +static inline void Convert32To24_32bytes(c64 w0, c64 w1, c64 w2, c64 w3, unsigned char* dest) +{ + c64 r0 = (w0 & mask24l) | ((w0 >> 8) & mask24h); /* bbbaaa */ + c64 r1 = (w1 & mask24l) | ((w1 >> 8) & mask24h); /* dddccc */ + c64 r2 = (w2 & mask24l) | ((w2 >> 8) & mask24h); /* fffeee */ + c64 r3 = (w3 & mask24l) | ((w3 >> 8) & mask24h); /* hhhggg */ + + /* ccbbbaaa */ + ((r0 ) | ((r1 << 48) & mask24hh)).Put(dest+0); + /* feeedddc */ + ((r1 >> 16) | ((r2 << 32) & mask24hhh)).Put(dest+8); + /* hhhgggff */ + ((r2 >> 32) | ((r3 << 16) & mask24hhhh)).Put(dest+16); +} + +#if defined(__x86_64) || defined(USE_MMX) +static void Convert32To24_32bytes(const unsigned char* src, + unsigned char* dest) +{ + c64 w0; w0.Get(src+0); + c64 w1; w1.Get(src+8); + c64 w2; w2.Get(src+16); + c64 w3; w3.Get(src+24); + Convert32To24_32bytes(w0,w1,w2,w3, dest); +} +#endif + +void Convert32To24Frame(const void* data, unsigned char* dest, unsigned npixels) +{ + const unsigned char* src = (const unsigned char*)data; + + #if defined(__x86_64) || defined(USE_MMX) + while(npixels >= 8) + { + Convert32To24_32bytes(src, dest); + src += 4*8; + dest += 3*8; + npixels -= 8; + } + #ifdef USE_MMX + MMX_clear(); + #endif + #endif + + for(unsigned pos=0; pos +struct Bits16const +{ + static const uint64_t static_value = + (( ((uint64_t)(unsigned short) basevalue_lo) << 0) + | ( ((uint64_t)(unsigned short) basevalue_hi) << 16) + | ( ((uint64_t)(unsigned short) basevalue_lo) << 32) + | ( ((uint64_t)(unsigned short) basevalue_hi) << 48)); + static const uint64_t value; +}; +template +const uint64_t Bits16const::value = + Bits16const::static_value; + +template +struct Bits32const +{ + static const uint64_t static_value = + (( ((uint64_t)(unsigned int) basevalue_lo) << 0) + | ( ((uint64_t)(unsigned int) basevalue_hi) << 32)); + static const uint64_t value = static_value; +};/* +template +const uint64_t Bits32const::value = + Bits32const::static_value;*/ + +template +struct Bits8const +{ + static const uint64_t static_value = + ((basevalue_lo << 0) + | (basevalue_hi << 8) + | (basevalue_lo << 16) + | (basevalue_hi << 24) + | (basevalue_lo << 32) + | (basevalue_hi << 40) + | (basevalue_lo << 48) + | (basevalue_hi << 56)); + static const uint64_t value = static_value; +}; + + +template +struct MaskBconst +{ + static const uint64_t basevalue_lo = (1 << lowbitcount) - 1; + static const uint64_t basevalue_hi = (1 << highbitcount) - 1; + static const uint64_t value = Bits8const::value << leftshift; +}; + +template +struct Convert_2byte_consts +{ + static const uint64_t mask_lo;// = MaskBconst::value; + static const uint64_t mask_hi;// = MaskBconst::value; + static const uint64_t mask_frac;// = MaskBconst<8-bits,8-bits, 0>::value; +}; +template +const uint64_t Convert_2byte_consts::mask_lo = MaskBconst::value; +template +const uint64_t Convert_2byte_consts::mask_hi = MaskBconst::value; +template +const uint64_t Convert_2byte_consts::mask_frac = MaskBconst<8-bits, 8-bits, 0>::value; + +template +struct Convert_2byte_helper +{ + c64 lo, hi; + + Convert_2byte_helper(c64 p4a, c64 p4b) + { + const uint64_t& mask_lo = Convert_2byte_consts::mask_lo; + const uint64_t& mask_hi = Convert_2byte_consts::mask_hi; + const uint64_t& mask_frac = Convert_2byte_consts::mask_frac; + + /* STEP 1: SEPARATE THE PIXELS INTO RED, GREEN AND BLUE COMPONENTS */ + + /* 000BBBBB 000bbbbb 000BBBBB 000bbbbb 000BBBBB 000bbbbb 000BBBBB 000bbbbb */ + c64 s5 = ((p4a >> offs) & mask_lo) | ((p4b << (8-offs)) & mask_hi); + + /* STEP 2: SCALE THE COLOR COMPONENTS TO 256 RANGE */ + + /* BBBBB000 bbbbb000 BBBBB000 bbbbb000 BBBBB000 bbbbb000 BBBBB000 bbbbb000 */ + /* 00000BBB 00000bbb 00000BBB 00000bbb 00000BBB 00000bbb 00000BBB 00000bbb */ + c64 v8 = (s5 << (8-bits)) | ((s5 >> (bits-(8-bits))) & mask_frac); + /* v8: + * + * BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb BBBBBBBB bbbbbbbb * + */ + + /* STEP 3: DEINTERLACE THE PIXELS */ + lo = (v8 ) & mask64l; + hi = (v8 >> 8) & mask64l; + } +}; + +/* +template +static void Convert_2byte_to_24Common(const unsigned char* src, unsigned char* dest) + __attribute((noinline)); +*/ +template +static void Convert_2byte_to_24or32Common(const unsigned char* src, unsigned char* dest) +{ + c64 p4a; p4a.Get(src+0); // four pixels + c64 p4b; p4b.Get(src+8); // another four pixels + + /* in: In both registers: */ + + Convert_2byte_helper r(p4a,p4b); + Convert_2byte_helper b(p4a,p4b); + Convert_2byte_helper g(p4a,p4b); + + /* STEP 4: CONVERT PIXELS INTO RGB32 */ + + /* Now we have: + * b.lo = 0j0g0d0a + * g.lo = 0k0h0e0b + * r.lo = 0l0i0f0c + * b.hi = 0J0G0D0A + * g.hi = 0K0H0E0B + * r.hi = 0L0I0F0C + * We want: + * w1 = 0fed0cba + * w2 = 0lkj0ihg + * w3 = 0FED0CBA + * w4 = 0LKJ0IHG + */ + +#if 0 && defined(__MMX__) /* FIXME why is this 0&&? */ + // punpcklbw 0k0h0e0b, 0j0g0d0a -> 00ed00ba + // punpcklwd 0l0i0f0c, ________ -> 0f__0c__ + c64 w1 = r.lo.unpacklwd(0) | g.lo.unpacklbw(b.lo); // pix 0,1 + // punpckhbw 0k0h0e0b, 0j0g0d0a -> 00kj00hg + // punpckhwd 0l0i0f0c, ________ -> 0l__0i__ + c64 w2 = r.lo.unpackhwd(0) | g.lo.unpackhbw(b.lo); // pix 2,3 + + c64 w3 = r.hi.unpacklwd(0) | g.hi.unpacklbw(b.hi); // pix 4,5 + c64 w4 = r.hi.unpackhwd(0) | g.hi.unpackhbw(b.hi); // pix 6,7 + #ifndef USE_MMX + MMX_clear(); + #endif +#else + /* With 64-bit registers, this code is greatly simpler than + * the emulation of unpack opcodes. However, when the + * unpack opcodes is available, using them is shorter. + * Which way is faster? FIXME: Find out + */ + + // mask64lw: 00**00** + // mask64hw: **00**00 + // b.lo & mask64lw: 000g000a + // g.lo & mask64lw: 000h000b + // r.lo & mask64lw: 000i000c + // b.lo & mask64hw: 0j000d00 + // g.lo & mask64hw: 0k000e00 + // r.lo & mask64hw: 0l000f00 + + c64 tlo1 = ((b.lo & mask64lw) ) | ((g.lo & mask64lw) << 8) | ((r.lo & mask64lw) << 16); + c64 tlo2 = ((b.lo & mask64hw) >>16) | ((g.lo & mask64hw) >> 8) | ((r.lo & mask64hw) ); + + c64 thi1 = ((b.hi & mask64lw) ) | ((g.hi & mask64lw) << 8) | ((r.hi & mask64lw) << 16); + c64 thi2 = ((b.hi & mask64hw) >>16) | ((g.hi & mask64hw) >> 8) | ((r.hi & mask64hw) ); + /* + * tlo1 = 0ihg0cba + * tlo2 = 0lkj0fed + * thi1 = 0IHG0CBA + * thi2 = 0LKJ0FED + * mask64ld = 0000**** + * mask64hd = ****0000 + */ + + c64 w1 = (tlo1 & mask64ld) | ((tlo2 & mask64ld) << 32); // 00000cba | 00000fed = 0fed0bca + c64 w2 = (tlo2 & mask64hd) | ((tlo1 & mask64hd) >> 32); // 0lkj0000 | 0ihg0000 = 0lkj0ihg + + c64 w3 = (thi1 & mask64ld) | ((thi2 & mask64ld) << 32); + c64 w4 = (thi2 & mask64hd) | ((thi1 & mask64hd) >> 32); +#endif + + if(rgb24) + { + /* STEP 5A: CONVERT PIXELS INTO RGB24 */ + Convert32To24_32bytes(w1,w2,w3,w4, dest); + } + else + { + /* STEP 5B: STORE RGB32 */ + w1.Put(dest+0); + w2.Put(dest+8); + w3.Put(dest+16); + w4.Put(dest+24); + } + + /* + punpcklbw ____ABCD, ____abcd = AaBbCcDd + punpcklwd ____ABCD, ____abcd = ABabCDcd + punpckldq ____ABCD, ____abcd = ABCDabcd + + punpckhbw ABCD____, abcd____ = AaBbCcDd + punpckhwd ABCD____, abcd____ = ABabCDcd + punpckhdq ABCD____, abcd____ = ABCDabcd + */ +} + +void Convert15To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue) +{ + const unsigned char* src = (const unsigned char*)data; + + if(swap_red_blue) + for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8) + Convert_2byte_to_24or32Common<0,5, 5,5, 10,5, true> (src, dest); + else + for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8) + Convert_2byte_to_24or32Common<10,5, 5,5, 0,5, true> (src, dest); + + #ifdef USE_MMX + MMX_clear(); + #endif + for(unsigned a=0; a= 8; src += 8*2, dest += 8*3, npixels -= 8) + Convert_2byte_to_24or32Common<0,5, 5,6, 11,5, true> (src, dest); + else + for(; npixels >= 8; src += 8*2, dest += 8*3, npixels -= 8) + Convert_2byte_to_24or32Common<11,5, 5,6, 0,5, true> (src, dest); + + #ifdef USE_MMX + MMX_clear(); + #endif + for(unsigned a=0; a= 8; src += 8*2, dest += 8*4, npixels -= 8) + Convert_2byte_to_24or32Common<0,5, 5,5, 10,5, false> (src, dest); + else + for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8) + Convert_2byte_to_24or32Common<10,5, 5,5, 0,5, false> (src, dest); + + #ifdef USE_MMX + MMX_clear(); + #endif + for(unsigned a=0; a= 8; src += 8*2, dest += 8*4, npixels -= 8) + Convert_2byte_to_24or32Common<0,5, 5,6, 11,5, false> (src, dest); + else + for(; npixels >= 8; src += 8*2, dest += 8*4, npixels -= 8) + Convert_2byte_to_24or32Common<11,5, 5,6, 0,5, false> (src, dest); + + #ifdef USE_MMX + MMX_clear(); + #endif + for(unsigned a=0; a(o16, rgbdata[2]) << 0) + | (Quantize4x4<63>(o16, rgbdata[1]) << 5) + | (Quantize4x4<31>(o16, rgbdata[0]) << 11); +#else + return (QuantizeNoDither<31>(rgbdata[2]) << 0) + | (QuantizeNoDither<63>(rgbdata[1]) << 5) + | (QuantizeNoDither<31>(rgbdata[0]) << 11); +#endif +} +static inline unsigned BuildR16(unsigned x,unsigned y, const unsigned char* rgbdata) +{ +#if 0 + unsigned o16 = (x + 4*y) % 16; + return (Quantize4x4<31>(o16, rgbdata[0]) << 0) + | (Quantize4x4<63>(o16, rgbdata[1]) << 5) + | (Quantize4x4<31>(o16, rgbdata[2]) << 11); +#else + return (QuantizeNoDither<31>(rgbdata[0]) << 0) + | (QuantizeNoDither<63>(rgbdata[1]) << 5) + | (QuantizeNoDither<31>(rgbdata[2]) << 11); +#endif +} +static inline unsigned Build15(unsigned x,unsigned y, const unsigned char* rgbdata) +{ + unsigned o16 = (x + 4*y) % 16; + return (Quantize4x4<31>(o16, rgbdata[2]) << 0) + | (Quantize4x4<31>(o16, rgbdata[1]) << 5) + | (Quantize4x4<31>(o16, rgbdata[0]) << 10); +} + +void Convert24To16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned char* logodata = (const unsigned char*) data; + unsigned short* result = (unsigned short*) dest; + unsigned x=0,y=0; + for(unsigned pos=0; pos= width) { x=0; ++y; } + } +} + +void Convert24ToR16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned char* logodata = (const unsigned char*) data; + unsigned short* result = (unsigned short*) dest; + unsigned x=0,y=0; + for(unsigned pos=0; pos= width) { x=0; ++y; } + } +} + +void Convert24To15Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned char* logodata = (const unsigned char*) data; + unsigned short* result = (unsigned short*) dest; + unsigned x=0,y=0; + for(unsigned pos=0; pos= width) { x=0; ++y; } + } +} + +#ifdef __MMX__ +static inline void Convert_I420_MMX_Common + (c64_MMX p0_1, c64_MMX p2_3, + unsigned char* dest_y0, + unsigned char* dest_y1, + unsigned char* dest_u, + unsigned char* dest_v) +{ + c64_MMX p0 = c64_MMX(0).unpacklbw(p0_1); // expand to 64-bit (4*16) + c64_MMX p1 = c64_MMX(0).unpackhbw(p0_1); + c64_MMX p2 = c64_MMX(0).unpacklbw(p2_3); + c64_MMX p3 = c64_MMX(0).unpackhbw(p2_3); + + c64_MMX ry_gy_by; ry_gy_by.Init16(RY,GY,BY, 0); + c64_MMX rgb_u; rgb_u.Init16(RU,GU,BU, 0); + c64_MMX rgb_v; rgb_v.Init16(RV,GV,BV, 0); + + c64_MMX ctotal = p0.add16( + p2.add16( + p1.add16( + p3))); + + p0 = _mm_madd_pi16(ry_gy_by.value, p0.value); + p1 = _mm_madd_pi16(ry_gy_by.value, p1.value); + p2 = _mm_madd_pi16(ry_gy_by.value, p2.value); + p3 = _mm_madd_pi16(ry_gy_by.value, p3.value); + + c64_MMX yy; + yy.Init16( ((p0.Extract32<0>() + p0.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p1.Extract32<0>() + p1.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p2.Extract32<0>() + p2.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p3.Extract32<0>() + p3.Extract32<1>()) >> (RGB2YUV_SHIFT)) ); + yy = yy.add16( Bits16const::value ); + + // Because we're writing to adjacent pixels, we optimize this by + // writing two 8-bit values at once in both cases. + *(short*)dest_y0 = yy.Extract88_from_1616lo(); + *(short*)dest_y1 = yy.Extract88_from_1616hi(); + + c64_MMX u_total32 = _mm_madd_pi16(rgb_u.value, ctotal.value); + c64_MMX v_total32 = _mm_madd_pi16(rgb_v.value, ctotal.value); + + *dest_u = U_ADD + ((u_total32.Extract32<0>() + u_total32.Extract32<1>()) >> (RGB2YUV_SHIFT+2)); + *dest_v = V_ADD + ((v_total32.Extract32<0>() + v_total32.Extract32<1>()) >> (RGB2YUV_SHIFT+2)); +} + +static inline void Convert_YUY2_MMX_Common + (c64_MMX p0_1, c64_MMX p2_3, + unsigned char* dest_yvyu) +{ + c64_MMX p0 = c64_MMX(0).unpacklbw(p0_1); // expand to 64-bit (4*16) + c64_MMX p1 = c64_MMX(0).unpackhbw(p0_1); + c64_MMX p2 = c64_MMX(0).unpacklbw(p2_3); // expand to 64-bit (4*16) + c64_MMX p3 = c64_MMX(0).unpackhbw(p2_3); + + c64_MMX ry_gy_by; ry_gy_by.Init16(RY,GY,BY, 0); + c64_MMX rgb_u; rgb_u.Init16(RU,GU,BU, 0); + c64_MMX rgb_v; rgb_v.Init16(RV,GV,BV, 0); + + c64_MMX ctotal0 = p0.add16(p1); + c64_MMX ctotal2 = p2.add16(p3); + + p0 = _mm_madd_pi16(ry_gy_by.value, p0.value); + p1 = _mm_madd_pi16(ry_gy_by.value, p1.value); + p2 = _mm_madd_pi16(ry_gy_by.value, p2.value); + p3 = _mm_madd_pi16(ry_gy_by.value, p3.value); + + c64_MMX yy; + yy.Init16( ((p0.Extract32<0>() + p0.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p1.Extract32<0>() + p1.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p2.Extract32<0>() + p2.Extract32<1>()) >> (RGB2YUV_SHIFT)), + ((p3.Extract32<0>() + p3.Extract32<1>()) >> (RGB2YUV_SHIFT)) ); + + yy = yy.add16( Bits16const::value ); + + c64_MMX u_total32_0 = _mm_madd_pi16(rgb_u.value, ctotal0.value); + c64_MMX v_total32_0 = _mm_madd_pi16(rgb_v.value, ctotal0.value); + c64_MMX u_total32_2 = _mm_madd_pi16(rgb_u.value, ctotal2.value); + c64_MMX v_total32_2 = _mm_madd_pi16(rgb_v.value, ctotal2.value); + + c64_MMX quadword = yy; // four y values: at 0, 2, 4 and 6 + + c64_MMX uv; uv.Init16( + ((v_total32_0.Extract32<0>() + v_total32_0.Extract32<1>()) >> (RGB2YUV_SHIFT+1)), + ((u_total32_0.Extract32<0>() + u_total32_0.Extract32<1>()) >> (RGB2YUV_SHIFT+1)), + ((v_total32_2.Extract32<0>() + v_total32_2.Extract32<1>()) >> (RGB2YUV_SHIFT+1)), + ((u_total32_2.Extract32<0>() + u_total32_2.Extract32<1>()) >> (RGB2YUV_SHIFT+1)) ); + c64_MMX uv_adds; uv_adds.Init16(V_ADD, U_ADD, V_ADD, U_ADD); + uv = uv.add16(uv_adds); + + quadword |= uv << 8; // two u and v values: at 1, 3, 5 and 7. + quadword.Put(dest_yvyu); // write four y values: at 0, 2, 4 and 6 +} +#endif + +/*template +void Convert_4byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) + __attribute__((noinline));*/ + +template +void Convert_4byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + + unsigned pos = 0; + unsigned ypos = 0; + unsigned vpos = npixels; + unsigned upos = vpos + npixels / 4; + unsigned stride = width*PixStride; + + /*fprintf(stderr, "npixels=%u, width=%u, height=%u, ypos=%u,upos=%u,vpos=%u", + npixels,width,height, ypos,upos,vpos);*/ + + /* This function is based on code from x264 svn version 711 */ + /* TODO: Apply MMX optimization for 24-bit pixels */ + + for(unsigned y=0; y> RGB2YUV_SHIFT); // y + } + + dest[upos++] = (U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+2)) ); + dest[vpos++] = (V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+2)) ); + } + + ypos += 2; + } + pos += stride; + ypos += width; + } + + /*fprintf(stderr, ",yr=%u,ur=%u,vr=%u\n", + ypos,upos,vpos);*/ + + #ifdef __MMX__ + MMX_clear(); + #endif +} + +template +void Convert_4byte_To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + unsigned pos = 0; + unsigned ypos = 0; + unsigned stride = width*PixStride; + + /* This function is based on code from x264 svn version 711 */ + /* TODO: Apply MMX optimization for 24-bit pixels */ + + for(unsigned y=0; y> RGB2YUV_SHIFT); // y + } + + dest[ypos+3] = (U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+1)) ); + dest[ypos+1] = (V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+1)) ); + } + ypos += 4; + } + } + #ifdef __MMX__ + MMX_clear(); + #endif +} + +/*template +void Convert_2byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) + __attribute__((noinline));*/ + +template +void Convert_2byte_To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned PixStride = 2; + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + unsigned pos = 0; + unsigned ypos = 0; + unsigned vpos = npixels; + unsigned upos = vpos + npixels / 4; + unsigned stride = width*PixStride; + + /* This function is based on code from x264 svn version 711 */ + + for(unsigned y=0; y + (src+pos, Rgb2byteBuf[0][0]); + + Convert_2byte_to_24or32Common + + (src+pos+stride, Rgb2byteBuf[1][0]); + + pos += 16; + + for(int x8 = 0; x8 < 8; x8 += 2) + { + #ifdef _q_MMX__ + c64_MMX p0_1; p0_1.Get(&Rgb2byteBuf[0][x8][0]); // two 32-bit pixels (4*8) + c64_MMX p2_3; p2_3.Get(&Rgb2byteBuf[1][x8][0]); // two 32-bit pixels + + Convert_I420_MMX_Common(p0_1, p2_3, + dest+ypos, + dest+ypos+width, + dest+upos++, + dest+vpos++); + #else + int c[3]; + /* TODO: Some faster means than using pointers */ + unsigned char* rgb[4] = + { + Rgb2byteBuf[0][x8+0], + Rgb2byteBuf[0][x8+1], + Rgb2byteBuf[1][x8+0], + Rgb2byteBuf[1][x8+1] + }; + + for(int m=0; m<3; ++m) c[m] = 0; + for(int n=0; n<4; ++n) + for(int m=0; m<3; ++m) + c[m] += rgb[n][m]; + + unsigned destpos[4] = { ypos, ypos+1, ypos+width, ypos+width+1 }; + for(int n=0; n<4; ++n) + { + dest[destpos[n]] + = Y_ADD + ((RY * rgb[n][0] + + GY * rgb[n][1] + + BY * rgb[n][2] + ) >> RGB2YUV_SHIFT); // y + } + + /*c[0] /= 4; c[1] /= 4; c[2] /= 4;*/ + // Note: +2 is because c[] contains 4 values + dest[upos++] = U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+2)); + dest[vpos++] = V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+2)); + #endif + ypos += 2; + } + } + pos += stride; + ypos += width; + } + + #ifdef __MMX__ + MMX_clear(); + #endif +} + +template +void Convert_2byte_To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + const unsigned PixStride = 2; + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + unsigned pos = 0; + unsigned ypos = 0; + unsigned stride = width*PixStride; + + for(unsigned y=0; y + (src+pos, Rgb2byteBuf[0]); + + pos += 16; + + for(int x8 = 0; x8 < 8; ) + { + #ifdef __MMX__ + c64_MMX p0_1; p0_1.Get(&Rgb2byteBuf[x8 ][0]); // two 32-bit pixels (4*8) + c64_MMX p2_3; p2_3.Get(&Rgb2byteBuf[x8+2][0]); // two 32-bit pixels (4*8) + Convert_YUY2_MMX_Common(p0_1, p2_3, dest+ypos); + x8 += 4; + ypos += 8; + #else + int c[3]; + /* TODO: Some faster means than using pointers */ + unsigned char* rgb[2] = + { + Rgb2byteBuf[x8+0], + Rgb2byteBuf[x8+1], + }; + + for(int m=0; m<3; ++m) c[m] = 0; + for(int n=0; n<2; ++n) + for(int m=0; m<3; ++m) + c[m] += rgb[n][m]; + + for(int n=0; n<2; ++n) + { + dest[ypos + n*2] + = Y_ADD + ((RY * rgb[n][0] + + GY * rgb[n][1] + + BY * rgb[n][2] + ) >> RGB2YUV_SHIFT); // y + } + + /*c[0] /= 4; c[1] /= 4; c[2] /= 4;*/ + // Note: +2 is because c[] contains 4 values + dest[ypos+3] = U_ADD + ((RU * c[0] + GU * c[1] + BU * c[2]) >> (RGB2YUV_SHIFT+1)); + dest[ypos+1] = V_ADD + ((RV * c[0] + GV * c[1] + BV * c[2]) >> (RGB2YUV_SHIFT+1)); + x8 += 2; + ypos += 4; + #endif + } + } + } + + #ifdef __MMX__ + MMX_clear(); + #endif +} + + +/***/ + +void Convert_I420To24Frame(const void* data, unsigned char* dest, + unsigned npixels, unsigned width, bool swap_red_blue) +{ + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + unsigned pos = 0; + unsigned ypos = 0; + unsigned vpos = npixels; + unsigned upos = vpos + npixels / 4; + + /*fprintf(stderr, "npixels=%u, width=%u, height=%u, ypos=%u,upos=%u,vpos=%u\n", + npixels,width,height, ypos,upos,vpos);*/ + + #ifdef __MMX__ + c64_MMX rgb[4], yy[4]; + static const c64_MMX vmul/*; vmul.Init16*/(VR, VG, 0, 0); // R,G,B,0 * vmul = V + static const c64_MMX umul/*; umul.Init16*/(0, UG, UB, 0); // R,G,B,0 * umul = U + #endif + + /* + Y input: 16..235 + U input: 16..240 + V input: 16..240 + + */ + + #pragma omp parallel for + for(unsigned y=0; y::value) + .shl16(16 - YUV2RGB_SHIFT); // shift them so that *13bitconst results in upper 16 bits having the actual value + c64_MMX vvq = c64_MMX(0) + .unpacklbw(tmp_v) + .sub16(Bits16const::value) + .shl16(16 - YUV2RGB_SHIFT); // shift them so that *13bitconst results in upper 16 bits having the actual value + + const short* uu = (const short*)&uuq; + const short* vv = (const short*)&vvq; + + /* c64_MMX rgb[4]; // four sets of 4*int16, each representing 1 rgb value */ + for(int n=0; n<4; ++n) + { + /* vv is shifted by 3 bits, vmul is shifted by 13 bits + * 16 bits in total, so mul16hi gets the 16-bit downscaled part */ + c64_MMX v; v.Init16(vv[n]); + c64_MMX u; u.Init16(uu[n]); + rgb[n] = v.mul16hi(vmul).add16( + u.mul16hi(umul) ); + } + + /* rgb[0] : U,V increment of RGB32 for x0,y0 - x1,y1 + * rgb[1] : U,V increment of RGB32 for x2,y0 - x3,y1 + * rgb[2] : U,V increment of RGB32 for x4,y0 - x5,y1 + * rgb[3] : U,V increment of RGB32 for x6,y0 - x7,y1 + */ + + unsigned yyoffs[4] = { ypos, ypos+1, ypos+width, ypos+width+1 }; + /* c64_MMX yy[4]; // four sets of 4*int16, each representing four Y values */ + for(int n=0; n<4; ++n) + { + c64_MMX luma; luma.Init16( + src[yyoffs[0]+n*2], /* n(0..3): x0y0,x2y0,x4y0,x6y0 */ + src[yyoffs[1]+n*2], /* n(0..3): x1y0,x3y0,x5y0,x7y0 */ + src[yyoffs[2]+n*2], /* n(0..3): x0y1,x2y1,x4y1,x6y1 */ + src[yyoffs[3]+n*2] /* n(0..3): x1y1,x3y1,x5y1,x7y1 */ + ); + luma = luma.sub16(Bits16const::value); + luma = luma.shl16(16 - YUV2RGB_SHIFT); + yy[n] = luma.mul16hi(Bits16const::value); + } + const short* const yyval = (const short*) &yy[0].value; + /* + values in order: + x0y0 x1y0 x0y1 x1y1 + x2y0 x3y0 x2y1 x3y1 + x4y0 x5y0 x4y1 x5y1 + x6y0 x7y0 x6y1 x7y1 + */ + int tmppos = pos; + for(int ny = 0; ny < 4; ny += 2) + { + /* Note: We must use 16-bit pixels here instead of 8-bit, + * because the rgb+Y addition can overflow. conv_s16_u8() + * does the necessary clamping, which would not be done + * if the values were 8-bit. + */ + // 8 pixels for one scanline, repeated twice + /* Note: C++ has no named constructors, so we + * use statement blocks here as substitutes. + */ + c64_MMX r0 + = rgb[0].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+0]); tmp; }) ) + .conv_s16_u8( + rgb[0].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+1]); tmp; }) )); + c64_MMX r1 + = rgb[1].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+4]); tmp; }) ) + .conv_s16_u8( + rgb[1].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+5]); tmp; }) )); + c64_MMX r2 + = rgb[2].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+8]); tmp; }) ) + .conv_s16_u8( + rgb[2].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+9]); tmp; }) )); + c64_MMX r3 + = rgb[3].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+12]); tmp; }) ) + .conv_s16_u8( + rgb[3].add16( ({ c64_MMX tmp; tmp.Init16(yyval[ny+13]); tmp; }) )); + + Convert32To24_32bytes(r0,r1,r2,r3, &dest[tmppos]); + tmppos += width*3; // next line + } + upos += 4; + vpos += 4; + ypos += 8; // eight bytes for this line (and eight from next too) + pos += 8*3; // eight triplets generated on this line + x += 8; // eight yy values used on this line + #else /* non-MMX */ + int u = src[upos] - U_ADD; + int v = src[vpos] - V_ADD; + + int rgb[3] = + { + (VR * v ) >> (YUV2RGB_SHIFT), + (VG * v + UG * u) >> (YUV2RGB_SHIFT), + ( + UB * u) >> (YUV2RGB_SHIFT) + }; + + unsigned incr[4] = {0,1,width,width+1}; + + for(unsigned r=0; r<4; ++r) + for(unsigned doffs=pos + incr[r]*3, yoffs=ypos + incr[r], + yy = (Y_REV * (src[yoffs] - Y_ADD)) >> YUV2RGB_SHIFT, + n=0; n<3; ++n) + dest[doffs+n] = c64::clamp_u8(rgb[n] + (int)yy); + + upos += 1; + vpos += 1; + ypos += 2; // two bytes for this line (two from next line) + pos += 2*3; // two triplets generated on this line + x += 2; // two yy values used on this line + #endif + } + ypos += width; + pos += 3*width; + } + #ifdef __MMX__ + MMX_clear(); + #endif +} + +void Convert_YUY2To24Frame(const void* data, unsigned char* dest, + unsigned npixels, unsigned width, bool swap_red_blue) +{ + const unsigned char* src = (const unsigned char*) data; + unsigned height = npixels / width; + unsigned pos = 0; + unsigned ypos = 0; + + /* TODO: MMX optimization */ + + /* + Y input: 16..235 + U input: 16..240 + V input: 16..240 + + */ + #pragma omp parallel for + for(unsigned y=0; y> (YUV2RGB_SHIFT), + (VG * v + UG * u) >> (YUV2RGB_SHIFT), + ( + UB * u) >> (YUV2RGB_SHIFT) + }; + + for(unsigned r=0; r<2; ++r) + for(unsigned doffs=pos + r*3, yoffs=ypos+r*2, + yy = (Y_REV * (src[yoffs] - Y_ADD)) >> YUV2RGB_SHIFT, + n=0; n<3; ++n) + dest[doffs+n] = c64::clamp_u8(rgb[n] + (int)yy); + + ypos += 4; // four bytes for this line (y,u,y,v) + pos += 2*3; // two triplets generated on this line + x += 2; // two yy values used on this line + } + } +} + +/***/ +void Convert24To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_4byte_To_I420Frame<3>(data,dest,npixels,width); +} +void Convert32To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_4byte_To_I420Frame<4>(data,dest,npixels,width); +} +void Convert15To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_2byte_To_I420Frame<10,5, 5,5, 0,5>(data,dest,npixels,width); +} +void Convert16To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_2byte_To_I420Frame<11,5, 5,6, 0,5>(data,dest,npixels,width); +} +/***/ +void Convert24To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_4byte_To_YUY2Frame<3>(data,dest,npixels,width); +} +void Convert32To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_4byte_To_YUY2Frame<4>(data,dest,npixels,width); +} +void Convert15To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_2byte_To_YUY2Frame<10,5, 5,5, 0,5>(data,dest,npixels,width); +} +void Convert16To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width) +{ + Convert_2byte_To_YUY2Frame<11,5, 5,6, 0,5>(data,dest,npixels,width); +} diff -NaHudr dosbox-0.73/src/rgbtorgb.hh dosbox-0.73-patched/src/rgbtorgb.hh --- dosbox-0.73/src/rgbtorgb.hh 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/rgbtorgb.hh 2010-02-26 01:36:14.736589962 +0200 @@ -0,0 +1,69 @@ +#ifdef __cplusplus +extern "C" { + #define defaulttrue =true +#else + #define defaulttrue + #define bool int +#endif + +/* RGB to RGB and RGB from/to YCbRr (YUV) conversions written by Bisqwit + * Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/) + * + * Concepts: + * 15 = RGB15 or BGR15 + * 16 = RGB16 or BGR16 + * 24 = RGB24 or BGR24 + * 32 = RGB32 or BGR32 + * I420 = YCbCr where Y is issued for each pixel, + * followed by Cr for 2x2 pixels, + * followed by Cb for 2x2 pixels + * YUY2 = YCbCr where for each pixel, Y is issued, + * followed by Cr for 2x1 pixels (if even pixel) + * or Cb for 2x1 pixels (if odd pixel) + * + * Note: Not all functions honor the swap_red_blue setting. + */ + +void Convert32To24Frame(const void* data, unsigned char* dest, unsigned npixels) + __attribute__((noinline)); + +void Convert15To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert16To24Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert15To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert16To32Frame(const void* data, unsigned char* dest, unsigned npixels, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert24To16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert24ToR16Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); + +void Convert24To15Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); + +void Convert_I420To24Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert15To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert16To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert24To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert32To_I420Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); + +void Convert_YUY2To24Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width, bool swap_red_blue defaulttrue) + __attribute__((noinline)); + +void Convert15To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert16To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert24To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); +void Convert32To_YUY2Frame(const void* data, unsigned char* dest, unsigned npixels, unsigned width); + +#ifdef __cplusplus +} + #undef defaulttrue +#else + #undef defaulttrue + #undef bool +#endif diff -NaHudr dosbox-0.73/src/simd.hh dosbox-0.73-patched/src/simd.hh --- dosbox-0.73/src/simd.hh 1970-01-01 02:00:00.000000000 +0200 +++ dosbox-0.73-patched/src/simd.hh 2008-04-05 17:01:09.719886860 +0300 @@ -0,0 +1,365 @@ +#if defined(__MMX__) && !defined(__x86_64) +#define USE_MMX +#endif +#if defined(__SSE__) +#define USE_SSE +#endif + +/* SIMD interface (MMX) written by Bisqwit + * Copyright (C) 1992,2008 Joel Yliluoma (http://iki.fi/bisqwit/) + */ + +#ifdef __3dNOW__ +# include /* Note: not available on ICC */ +#elif defined(__MMX__) +# include +#endif +#ifdef __SSE__ +#include + #ifdef __ICC + typedef __m128 __v4sf; + #endif +#endif + +struct c64_common +{ + static signed char clamp_s8(int_fast64_t v) + { return v<-128 ? -128 : (v > 127 ? 127 : v); } + static unsigned char clamp_u8(int_fast64_t v) + { return v<0 ? 0 : (v > 255 ? 255 : v); } + static short clamp_s16(int_fast64_t v) + { return v<-32768 ? -32768 : (v > 32767 ? 32767 : v); } + + static inline uint_fast64_t expand32_8(uint_fast32_t a) + { + // 0000abcd -> 0a0b0c0d + typedef uint_fast64_t v; + return (a&0xFFU) + | ((a&0xFF00U)<<8) // base: 8+8 = 16 + | ((v)(a&0xFF0000U)<<16) // base: 16+16 = 32 + | ((v)(a&0xFF000000UL)<<24); // base: 24+24 = 48 + } + static inline uint_fast64_t expand32_16(uint_fast32_t a) + { + // 0000abcd -> 00ab00cd + typedef uint_fast64_t v; + return (a&0xFFFFU) + | ((v)(a&0xFFFF0000UL)<<16); // base: 16+16 = 32 + } +}; + +#ifdef __MMX__ +/* 64-bit integers that use MMX / 3Dnow operations where relevant */ +struct c64_MMX: public c64_common +{ + typedef c64_MMX c64; + + __m64 value; + + inline c64_MMX() { } + inline c64_MMX(__m64 v) : value(v) { } + inline c64_MMX(const uint64_t& v) : value( *(const __m64*)& v) { } + inline c64_MMX(int v) : value(_m_from_int(v)) { } + inline c64_MMX(short a,short b,short c, short d) + : value(_mm_setr_pi16(a,b,c,d)) { } + + inline c64 operator<< (int b) const { if(b < 0) return *this >> -b; return shl64(b); } + inline c64 operator>> (int b) const { if(b < 0) return *this << -b; return shr64(b); } + c64& operator<<= (int n) { return *this = shl64(n); } + c64& operator>>= (int n) { return *this = shr64(n); } + + c64 conv_s16_u8() const { return conv_s16_u8(*this); } + c64 conv_s16_s8() const { return conv_s16_s8(*this); } + + void Get(const unsigned char* p) { value = *(const __m64*)p; } + void Put( unsigned char* p)const { *(__m64*)p = value; } + + void Init16(short a,short b,short c, short d) + { value = _mm_setr_pi16(a,b,c,d); } + void Init16(short a) + { value = _mm_set1_pi16(a); } + + void GetD(const unsigned char* p) { value = *(const __m64*)p; } + + template + short Extract16() const { return ((const short*)&value)[n]; } + template + int Extract32() const { return ((const int*)&value)[n]; } + + short Extract88_from_1616lo() const + { + const unsigned char* data = (const unsigned char*)&value; + // bytes: 76543210 + // shorts: 33221100 + // take: H L + return data[0] | *(short*)(data+1); + //return data[0] | ((*(const unsigned int*)data) >> 8); + } + short Extract88_from_1616hi() const + { + const unsigned char* data = 4+(const unsigned char*)&value; + // bytes: 76543210 + // shorts: 33221100 + // take: H L + return data[0] | *(short*)(data+1); + //return data[0] | ((*(const unsigned int*)data) >> 8); + } + + + c64& operator&= (const c64& b) { value=_mm_and_si64(value,b.value); return *this; } + c64& operator|= (const c64& b) { value=_mm_or_si64(value,b.value); return *this; } + c64& operator^= (const c64& b) { value=_mm_xor_si64(value,b.value); return *this; } + c64& operator+= (const c64& b) { return *this = *this + b; } + c64& operator-= (const c64& b) { return *this = *this - b; } + + c64 operator~ () const { + static const uint_least64_t negpat = ~(uint_least64_t)0; + return c64(_mm_xor_si64(value, *(const __m64*)&negpat)); + } + + /* psllqi: p = packed + s = shift + r = right, l = left + l = shift in zero, a = shift in sign bit + q = 64-bit, d = 32-bit, w = 16-bit + [i = immed amount] + */ + c64 operator& (const c64& b) const { return c64(_mm_and_si64(value,b.value)); } + c64 operator| (const c64& b) const { return c64(_mm_or_si64(value,b.value)); } + c64 operator^ (const c64& b) const { return c64(_mm_xor_si64(value,b.value)); } + + c64 operator- (const c64& b) const + { + #ifdef __SSE2__ + return _mm_sub_si64(value, b.value); + #else + return (const uint64_t&)value - (const uint64_t&)b.value; + #endif + } + c64 operator+ (const c64& b) const + { + #ifdef __SSE2__ + return _mm_add_si64(value, b.value); + #else + return (const uint64_t&)value + (const uint64_t&)b.value; + #endif + } + + + c64 shl64(int b) const { return _mm_slli_si64(value, b); } + c64 shr64(int b) const { return _mm_srli_si64(value, b); } + c64 shl16(int b) const { return _mm_slli_pi16(value, b); } + c64 shr16(int b) const { return _mm_srli_pi16(value, b); } + c64 sar32(int b) const { return _mm_srai_pi32(value, b); } + c64 sar16(int b) const { return _mm_srai_pi16(value, b); } + c64 add32(const c64& b) const { return _mm_add_pi32(value, b.value); } + c64 add16(const c64& b) const { return _mm_add_pi16(value, b.value); } + c64 sub32(const c64& b) const { return _mm_sub_pi32(value, b.value); } + c64 sub16(const c64& b) const { return _mm_sub_pi16(value, b.value); } + c64 mul16(const c64& b) const { return _mm_mullo_pi16(value, b.value); } + c64 mul16hi(const c64& b) const { return _mm_mulhi_pi16(value, b.value); } + //c64 mul32(const c64& b) const { return _mm_mullo_pi32(value, b.value); } + c64 add8(const c64& b) const { return _mm_add_pi8(value, b.value); } + c64 sub8(const c64& b) const { return _mm_sub_pi8(value, b.value); } + + c64 unpacklbw(const c64& b) const { return _mm_unpacklo_pi8(b.value,value); } + c64 unpacklwd(const c64& b) const { return _mm_unpacklo_pi16(b.value,value); } + c64 unpackhbw(const c64& b) const { return _mm_unpackhi_pi8(b.value,value); } + c64 unpackhwd(const c64& b) const { return _mm_unpackhi_pi16(b.value,value); } + c64 unpackldq(const c64& b) const { return _mm_unpacklo_pi32(b.value,value); } + c64 unpackldq() const { return _mm_unpacklo_pi32(value,value); } + + c64 operator& (const uint64_t& v) { return c64(_mm_and_si64(value, *(const __m64*)& v)); } + + c64 conv_s32_s16(const c64& b) const { return _mm_packs_pi32(value, b.value); } + c64 conv_s16_u8(const c64& b) const { return _mm_packs_pu16(value, b.value); } + c64 conv_s16_s8(const c64& b) const { return _mm_packs_pi16(value, b.value); } +}; +#endif + +struct c64_nonMMX: public c64_common +{ + typedef c64_nonMMX c64; + + uint_least64_t value; + + inline c64_nonMMX() { } + inline c64_nonMMX(uint64_t v) : value(v) { } + inline c64_nonMMX(int v) : value(v) { } + inline c64_nonMMX(short a,short b,short c, short d) + { Init16(a,b,c,d); } + + c64 operator<< (int b) const { if(b < 0) return *this >> -b; return shl64(b); } + c64 operator>> (int b) const { if(b < 0) return *this << -b; return shr64(b); } + c64& operator<<= (int n) { return *this = shl64(n); } + c64& operator>>= (int n) { return *this = shr64(n); } + + c64 conv_s16_u8() const { return conv_s16_u8(*this); } + c64 conv_s16_s8() const { return conv_s16_s8(*this); } + + void Init16(short a,short b,short c, short d) + { uint_fast64_t aa = (unsigned short)a, + bb = (unsigned short)b, + cc = (unsigned short)c, + dd = (unsigned short)d; + value = aa | (bb << 16) | (cc << 32) | (dd << 48); } + void Init16(short a) + { Init16(a,a,a,a); } + void Init8(unsigned char a,unsigned char b,unsigned char c,unsigned char d, + unsigned char e,unsigned char f,unsigned char g,unsigned char h) + { + value = ((uint_fast64_t)(a | (b << 8) | (c << 16) | (d << 24))) + | (((uint_fast64_t)e) << 32) + | (((uint_fast64_t)f) << 40) + | (((uint_fast64_t)g) << 48) + | (((uint_fast64_t)h) << 56); + } + + void Get(const unsigned char* p) { value = *(const uint_least64_t*)p; } + void Put( unsigned char* p)const { *(uint_least64_t*)p = value; } + + c64& operator&= (const c64& b) { value&=b.value; return *this; } + c64& operator|= (const c64& b) { value|=b.value; return *this; } + c64& operator^= (const c64& b) { value^=b.value; return *this; } + c64& operator+= (const c64& b) { value+=b.value; return *this; } + c64& operator-= (const c64& b) { value-=b.value; return *this; } + c64 operator& (const c64& b) const { return value & b.value; } + c64 operator| (const c64& b) const { return value | b.value; } + c64 operator^ (const c64& b) const { return value ^ b.value; } + c64 operator- (const c64& b) const { return value - b.value; } + c64 operator+ (const c64& b) const { return value + b.value; } + + c64 operator& (uint_fast64_t b) const { return value & b; } + + c64 operator~ () const { return ~value; } + + #define usimdsim(type, count, op) \ + type* p = (type*)&res.value; \ + for(int n=0; n> b; } + c64 shl16(int b) const { c64 res = *this; usimdsim(short, 2, <<); return res; } + c64 shr16(int b) const { c64 res = *this; usimdsim(unsigned short, 2, >>); return res; } + c64 sar32(int b) const { c64 res = *this; usimdsim(int, 2, >>); return res; } + c64 sar16(int b) const { c64 res = *this; usimdsim(short, 2, >>); return res; } + + c64 add16(const c64& b) const { c64 res = *this; simdsim(short, 4, +); return res; } + c64 sub16(const c64& b) const { c64 res = *this; simdsim(short, 4, -); return res; } + c64 add32(const c64& b) const { c64 res = *this; simdsim(int, 2, +); return res; } + c64 sub32(const c64& b) const { c64 res = *this; simdsim(int, 2, -); return res; } + c64 mul16(const c64& b) const { c64 res = *this; simdsim(short, 4, *); return res; } + c64 mul16hi(const c64& b) const { c64 res = *this; simdsim(short, 4, *) >> 16; return res; } + c64 add8(const c64& b) const { c64 res = *this; simdsim(unsigned char, 8, +); return res; } + c64 sub8(const c64& b) const { c64 res = *this; simdsim(unsigned char, 8, -); return res; } + + #undef simdsim + #undef usimdsim + + c64 conv_s32_s16(const c64& b) const + { + c64 res; res. + Init16(clamp_s16(value & 0xFFFFFFFFU), + clamp_s16(value >> 32), + clamp_s16(b.value & 0xFFFFFFFFU), + clamp_s16(b.value >> 32)); + return res; + } + c64 conv_s16_u8(const c64& b) const + { + c64 res; res. + Init8(clamp_u8(value & 0xFFFF), + clamp_u8((value >> 16) & 0xFFFF), + clamp_u8((value >> 32) & 0xFFFF), + clamp_u8((value >> 48) & 0xFFFF), + clamp_u8(b.value & 0xFFFF), + clamp_u8((b.value >> 16) & 0xFFFF), + clamp_u8((b.value >> 32) & 0xFFFF), + clamp_u8((b.value >> 48) & 0xFFFF)); + return res; + } + c64 conv_s16_s8(const c64& b) const + { + c64 res; res. + Init8(clamp_s8(value & 0xFFFF), + clamp_s8((value >> 16) & 0xFFFF), + clamp_s8((value >> 32) & 0xFFFF), + clamp_s8((value >> 48) & 0xFFFF), + clamp_s8(b.value & 0xFFFF), + clamp_s8((b.value >> 16) & 0xFFFF), + clamp_s8((b.value >> 32) & 0xFFFF), + clamp_s8((b.value >> 48) & 0xFFFF)); + return res; + } + + /* TODO: Verify that these are correct (though they should never be used anyway) */ + c64 unpacklbw(const c64& p) const + { + #if defined(__MMX__) && !defined(__ICC) + /* ICC says [error: type of cast must be integral or enum] + * on the return value cast, + * so we cannot use this code on ICC. Fine for GCC. */ + return (uint_least64_t)_m_punpcklbw(*(__m64*)&p.value, *(__m64*)&value); + #else + uint_fast64_t a=value, b=p.value; + return expand32_8(a) | (expand32_8(b) << 8); + #endif + } + c64 unpackhbw(const c64& p) const + { + #if defined(__MMX__) && !defined(__ICC) + return (uint_least64_t)_m_punpckhbw(*(__m64*)&p.value, *(__m64*)&value); + #else + uint_fast64_t a=value, b=p.value; + return expand32_8(a>>32) | (expand32_8(b>>32) << 8); + #endif + } + c64 unpacklwd(const c64& p) const + { + #if defined(__MMX__) && !defined(__ICC) + return (uint_least64_t)_m_punpcklwd(*(__m64*)&p.value, *(__m64*)&value); + #else + uint_fast64_t a=value, b=p.value; + return expand32_16(a) | (expand32_16(b) << 16); + #endif + } + c64 unpackhwd(const c64& p) const + { + #if defined(__MMX__) && !defined(__ICC) + return (uint_least64_t)_m_punpckhwd(*(__m64*)&p.value, *(__m64*)&value); + #else + uint_fast64_t a=value, b=p.value; + return expand32_16(a>>32) | (expand32_16(b>>32) << 16); + #endif + } + c64 unpackldq() const { return unpackldq(*this); } + c64 unpackldq(const c64& p) const + { + #if defined(__MMX__) && !defined(__ICC) + return (uint_least64_t)_m_punpckldq(*(__m64*)&p.value, *(__m64*)&value); + #else + return value | (p.value << 32); + #endif + } +}; + +#ifdef USE_MMX +typedef c64_MMX c64; +#else +typedef c64_nonMMX c64; +#endif + +static inline void MMX_clear() +{ + #ifdef __3dNOW__ + _m_femms(); /* Note: not available on ICC or Valgrind */ + //_mm_empty(); + #elif defined(__MMX__) + _mm_empty(); + #endif +}