R5900簡易デコンパイラを作ってみた
PS2にはR5900というCPUが搭載されている。PS2エミュレータはこのR5900の機械語をリコンパイルして実行したりするのだが、現在世界で最も有名なエミュレータの一つであるPCSX2のリコンパイラのソースの一部を読んだところ、機械語が物凄い勢いで機械語にリコンパイルされていた。時間がなくて深くは読んでいないのだが、ものすごく複雑だった。これをもう少し簡単に行うことができないかと考えてみた。
自分が考えたアイデアはR5900の機械語を一度C言語にデコンパイルし、それをx86/x64バイナリを吐くことができるコンパイラで再度コンパイルするという物。上手く行けばCPUの拡張命令セットも簡単に使えるようになるはず。ということで、R5900のでコンパイラを作って実験してみた。ただし、機械語を読むのは面倒なので、R5900のアセンブラからCを吐かせるようにした。
まず用意したテストコードがこちら。
#define SIZE (2048) #define MAX (1 << 20) static int GRAPH[SIZE][SIZE]; inline unsigned long xor128() { static unsigned long x=123456789,y=362436069,z=521288629,w=88675123; unsigned long t; t=(x^(x<<11));x=y;y=z;z=w; return( w=(w^(w>>19))^(t^(t>>8)) ); } main() { int i, j, k; for (i = 0; i < SIZE; ++i){ for (j = 0; j < SIZE; ++j){ GRAPH[i][j] = xor128() % MAX; } } for (k = 0; k < SIZE; ++k){ for (i = 0; i < SIZE; ++i){ for (j = 0; j < SIZE; ++j){ if (GRAPH[i][j] > GRAPH[i][k] + GRAPH[k][j]){ GRAPH[i][j] = GRAPH[i][k] + GRAPH[k][j]; } } } } }
枝のコストをランダムに決定したグラフの全点間の最短距離をWarshall-Floyd法を用いて調べるというコード。特に変わったところはない。これをPS2DEV.ORG配布されているtoolchainを用いてコンパイルしてアセンブラを吐かせた結果がこちら。
.file 1 "benchmark.c" .section .mdebug.eabi64 .previous .sdata .align 3 .type x.0,@object .size x.0,8 x.0: .dword 0x75bcd15 .align 3 .type y.1,@object .size y.1,8 y.1: .dword 0x159a55e5 .align 3 .type z.2,@object .size z.2,8 z.2: .dword 0x1f123bb5 .align 3 .type w.3,@object .size w.3,8 w.3: .dword 0x5491333 .text .align 2 .p2align 3,,7 .globl main .ent main main: .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 .mask 0x00000000,0 .fmask 0x00000000,0 lui $2,%hi(GRAPH) # high ld $8,x.0 ld $10,y.1 addiu $15,$2,%lo(GRAPH) # low ld $9,z.2 move $14,$0 ld $6,w.3 dli $13,0xfffff # 1048575 sll $2,$14,13 $L41: li $7,2047 # 0x7ff addu $5,$2,$15 .p2align 3 $L11: dsll $3,$8,11 addu $7,$7,-1 xor $3,$8,$3 move $8,$10 dsrl $2,$3,8 move $10,$9 xor $3,$3,$2 move $9,$6 dsrl $2,$6,19 move $11,$10 xor $2,$6,$2 xor $2,$2,$3 move $6,$2 and $2,$2,$13 dsll $2,$2,32 dsra $2,$2,32 sw $2,0($5) .set noreorder .set nomacro bgez $7,$L11 addu $5,$5,4 .set macro .set reorder addu $14,$14,1 slt $2,$14,2048 .set noreorder .set nomacro bne $2,$0,$L41 sll $2,$14,13 .set macro .set reorder lui $2,%hi(GRAPH) # high sd $6,w.3 sd $11,y.1 addiu $10,$2,%lo(GRAPH) # low sd $8,x.0 sd $9,z.2 move $9,$0 sll $2,$9,2 $L42: move $14,$0 addu $8,$2,$10 sll $11,$9,13 $L27: sll $2,$14,13 addu $6,$11,$10 addu $5,$2,$10 li $7,2047 # 0x7ff .p2align 3 $L26: lw $4,0($6) addu $7,$7,-1 lw $3,0($8) lw $2,0($5) addu $3,$3,$4 slt $2,$3,$2 .set noreorder .set nomacro beq $2,$0,$L23 addu $6,$6,4 .set macro .set reorder sw $3,0($5) $L23: .set noreorder .set nomacro bgez $7,$L26 addu $5,$5,4 .set macro .set reorder addu $14,$14,1 slt $2,$14,2048 .set noreorder .set nomacro bne $2,$0,$L27 addu $8,$8,8192 .set macro .set reorder addu $9,$9,1 slt $2,$9,2048 .set noreorder .set nomacro bnel $2,$0,$L42 sll $2,$9,2 .set macro .set reorder j $31 .end main $Lfe1: .size main,$Lfe1-main .align 2 .p2align 3,,7 .globl xor128 .ent xor128 xor128: .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 .mask 0x00000000,0 .fmask 0x00000000,0 ld $2,x.0 ld $5,w.3 dsll $3,$2,11 xor $2,$2,$3 dsrl $4,$5,19 dsrl $3,$2,8 xor $4,$5,$4 xor $2,$2,$3 ld $3,y.1 xor $4,$4,$2 ld $2,z.2 sd $3,x.0 sd $2,y.1 sd $5,z.2 sd $4,w.3 .set noreorder .set nomacro j $31 move $2,$4 .set macro .set reorder .end xor128 $Lfe2: .size xor128,$Lfe2-xor128 .section .bss GRAPH: .align 3 .space 16777216 .previous .ident "GCC: (GNU) 3.2.2"
普段目にしない記号ばかりなので面喰ってしまった。これをC言語にデコンパイルするプログラムのソースがこちら。
#include <cstdio> #include <string> #include <sstream> #include <map> #include <iostream> using namespace std; #define REGIST_COMMAND(x) lineProcessors[#x]=x typedef void (*processLine)(const string& arg0, const string& arg1, const string& arg2); static map<string, processLine> lineProcessors; static char slot[1024] = {0}; static char delaySlot[1024] = {0}; string replace(const string& s, char from, char to) { string result = s; for (string::iterator it = result.begin(); it != result.end(); ++it){ if (*it == from){ *it = to; } } return result; } string parseArgS64(const string& s) { char buffer[1024]; if (s[0] == '$'){ sprintf(buffer, "registers.S64[%s]", s.c_str() + 1); } else { sprintf(buffer, "%s", s.c_str()); } return buffer; } string parseArgU32(const string& s) { char buffer[1024]; if (s[0] == '$'){ sprintf(buffer, "registers.U32[%s][0]", s.c_str() + 1); } else { sprintf(buffer, "%s", s.c_str()); } return buffer; } void ld(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = *(u64*)(&RAM[%s]);\n", r0, replace(arg1, '.', '_').c_str()); } void move(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d];\n", r0, r1); } void dli(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = %sULL;\n", r0, arg1.c_str()); } void sll(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); sprintf(slot, "\tregisters.U32[%d][0] = registers.U32[%d][0] >> %s;\n", r0, r1, arg2.c_str()); } void li(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = %s;\n", r0, arg1.c_str()); } void addu(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.U32[%d][0] = %s + %s;\n", r0, parseArgU32(arg1).c_str(), parseArgU32(arg2).c_str()); } void dsll(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d] << %s;\n", r0, r1, arg2.c_str()); } void xor_(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d] ^ registers.U64[%d];\n", r0, r1, r2); } void and_(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d] & registers.U64[%d];\n", r0, r1, r2); } void dsra(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.S64[%d] = registers.S64[%d] >> %s;\n", r0, r1, arg2.c_str()); } void dsrl(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); const int r2 = atoi(arg2.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d] >> %s;\n", r0, r1, arg2.c_str()); } void sw(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); istringstream iss(arg1); int address; char c; int r1; iss >> address >> c >> c >> r1; sprintf(slot, "\t*(u32*)(&RAM[registers.U32[%d][0] + %d]) = registers.U32[%d][0];\n", r1, address, r0); } void bgez(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(delaySlot, "%%s" "\tif (registers.S32[%d][0] >= 0) {\n" "\t\tgoto %s;\n" "\t}\n", r0, arg1.c_str()); } void slt(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(slot, "\tregisters.U64[%d] = %s < %s ? 1 : 0;\n", r0, parseArgS64(arg1).c_str(), parseArgS64(arg2).c_str()); } void bne(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); sprintf(delaySlot, "%%s" "\tif (registers.U64[%d] != registers.U64[%d]) {\n" "\t\tgoto %s;\n" "\t}\n", r0, r1, arg2.c_str()); } void sd(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); sprintf(slot, "\t*(u64*)(&RAM[%s]) = registers.U64[%d];\n", replace(arg1, '.', '_').c_str(), r0); } void lw(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); istringstream iss(arg1); int address; char c; int r1; iss >> address >> c >> c >> r1; sprintf(slot, "\tregisters.S32[%d][0] = *(s32*)(&RAM[registers.U32[%d][0] + %d]);\n", r0, r1, address); } void beq(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); sprintf(delaySlot, "%%s" "\tif (registers.U64[%d] == registers.U64[%d]) {\n" "\t\tgoto %s;\n" "\t}\n", r0, r1, arg2.c_str()); } void bnel(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); sprintf(delaySlot, "\tif (registers.U64[%d] != registers.U64[%d]) {\n" "\t%%s" "\t\tgoto %s;\n" "\t}\n", r0, r1, arg2.c_str()); } void lui(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); istringstream iss(arg1); string val; char c; iss >> c >> c >> c >> c >> val; val.erase(val.size() - 1); sprintf(slot, "\tregisters.U32[%d][0] = (u32)%s & 0xffff0000;\n", r0, val.c_str()); } void addiu(const string& arg0, const string& arg1, const string& arg2) { const int r0 = atoi(arg0.c_str() + 1); const int r1 = atoi(arg1.c_str() + 1); istringstream iss(arg2); string val; char c; iss >> c >> c >> c >> c >> val; val.erase(val.size() - 1); sprintf(slot, "\tregisters.U64[%d] = registers.U64[%d] + (((u64)%s) & 0xffff);\n", r0, r1, val.c_str()); } int main() { REGIST_COMMAND(ld); REGIST_COMMAND(move); REGIST_COMMAND(dli); REGIST_COMMAND(sll); REGIST_COMMAND(li); REGIST_COMMAND(addu); REGIST_COMMAND(dsll); lineProcessors["xor"]=xor_; lineProcessors["and"]=and_; REGIST_COMMAND(dsra); REGIST_COMMAND(dsrl); REGIST_COMMAND(sw); REGIST_COMMAND(bgez); REGIST_COMMAND(slt); REGIST_COMMAND(bne); REGIST_COMMAND(sd); REGIST_COMMAND(lw); REGIST_COMMAND(beq); REGIST_COMMAND(bnel); REGIST_COMMAND(lui); REGIST_COMMAND(addiu); cout << "typedef unsigned char u8;" << endl; cout << "typedef signed char s8;" << endl; cout << "typedef unsigned short u16;" << endl; cout << "typedef signed short s16;" << endl; cout << "typedef unsigned long u32;" << endl; cout << "typedef signed long s32;" << endl; cout << "typedef unsigned long long u64;" << endl; cout << "typedef signed long long s64;" << endl; cout << "static u8 RAM[32 * 1024 * 1024];" << endl; cout << "static const int GRAPH = 0;" << endl; cout << "static const int x_0 = 16777216;" << endl; cout << "static const int y_1 = 16777216 + 8;" << endl; cout << "static const int z_2 = 16777216 + 8 + 8;" << endl; cout << "static const int w_3 = 16777216 + 8 + 8 + 8;" << endl; cout << "static union _registers {" << endl; cout << "\tu64 U64[32];" << endl; cout << "\ts64 S64[32];" << endl; cout << "\tu32 U32[32][2];" << endl; cout << "\ts32 S32[32][2];" << endl; cout << "\tu16 U16[32][4];" << endl; cout << "\ts16 S16[32][4];" << endl; cout << "\tu8 U8 [32][8];" << endl; cout << "\ts8 S8 [32][8];" << endl; cout << "} registers;" << endl; cout << endl; cout << "int main() {" << endl; cout << "\t*(u64*)(&RAM[x_0]) = 0x75bcd15;" << endl; cout << "\t*(u64*)(&RAM[y_1]) = 0x159a55e5;" << endl; cout << "\t*(u64*)(&RAM[z_2]) = 0x1f123bb5;" << endl; cout << "\t*(u64*)(&RAM[w_3]) = 0x5491333;" << endl; string line; while (getline(cin, line)){ for (string::iterator it = line.begin(); it != line.end(); ++it){ if (*it == ','){ *it = ' '; } } string command, arg0, arg1, arg2; istringstream iss(line); iss >> command >> arg0 >> arg1 >> arg2; cout << "//" << line << endl; if (command.empty() || command[0] == '.'){ continue; } if (command[command.size() - 1] == ':'){ cout << replace(command, '.', '_') << endl; continue; } if (command == "j"){ break; } if (lineProcessors.count(command)){ lineProcessors[command](arg0, arg1, arg2); } else { cout << "NOT FOUND : " << line << endl; } if (delaySlot[0] && slot[0]){ printf(delaySlot, slot); delaySlot[0] = '\0'; slot[0] = '\0'; } else if (slot[0]) { printf("%s", slot); slot[0] = '\0'; } } cout << "}" << endl; }
必要な命令しか組み込んでいないが、とりあえずは動く。これを使って先ほどのアセンブラをデコンパイルした結果がこちら。
typedef unsigned char u8; typedef signed char s8; typedef unsigned short u16; typedef signed short s16; typedef unsigned long u32; typedef signed long s32; typedef unsigned long long u64; typedef signed long long s64; static u8 RAM[32 * 1024 * 1024]; static const int GRAPH = 0; static const int x_0 = 16777216; static const int y_1 = 16777216 + 8; static const int z_2 = 16777216 + 8 + 8; static const int w_3 = 16777216 + 8 + 8 + 8; static union _registers { u64 U64[32]; s64 S64[32]; u32 U32[32][2]; s32 S32[32][2]; u16 U16[32][4]; s16 S16[32][4]; u8 U8 [32][8]; s8 S8 [32][8]; } registers; int main() { *(u64*)(&RAM[x_0]) = 0x75bcd15; *(u64*)(&RAM[y_1]) = 0x159a55e5; *(u64*)(&RAM[z_2]) = 0x1f123bb5; *(u64*)(&RAM[w_3]) = 0x5491333; // .file 1 "benchmark.c" // .section .mdebug.eabi64 // .previous // .sdata // .align 3 // .type x.0 @object // .size x.0 8 //x.0: x_0: // .dword 0x75bcd15 // .align 3 // .type y.1 @object // .size y.1 8 //y.1: y_1: // .dword 0x159a55e5 // .align 3 // .type z.2 @object // .size z.2 8 //z.2: z_2: // .dword 0x1f123bb5 // .align 3 // .type w.3 @object // .size w.3 8 //w.3: w_3: // .dword 0x5491333 // .text // .align 2 // .p2align 3 7 // .globl main // .ent main //main: main: // .frame $sp 0 $31 # vars= 0 regs= 0/0 args= 0 extra= 0 // .mask 0x00000000 0 // .fmask 0x00000000 0 // lui $2 %hi(GRAPH) # high registers.U32[2][0] = (u32)GRAPH & 0xffff0000; // ld $8 x.0 registers.U64[8] = *(u64*)(&RAM[x_0]); // ld $10 y.1 registers.U64[10] = *(u64*)(&RAM[y_1]); // addiu $15 $2 %lo(GRAPH) # low registers.U64[15] = registers.U64[2] + (((u64)GRAPH) & 0xffff); // ld $9 z.2 registers.U64[9] = *(u64*)(&RAM[z_2]); // move $14 $0 registers.U64[14] = registers.U64[0]; // ld $6 w.3 registers.U64[6] = *(u64*)(&RAM[w_3]); // dli $13 0xfffff # 1048575 registers.U64[13] = 0xfffffULL; // sll $2 $14 13 registers.U32[2][0] = registers.U32[14][0] >> 13; //$L41: $L41: // li $7 2047 # 0x7ff registers.U64[7] = 2047; // addu $5 $2 $15 registers.U32[5][0] = registers.U32[2][0] + registers.U32[15][0]; // .p2align 3 //$L11: $L11: // dsll $3 $8 11 registers.U64[3] = registers.U64[8] << 11; // addu $7 $7 -1 registers.U32[7][0] = registers.U32[7][0] + -1; // xor $3 $8 $3 registers.U64[3] = registers.U64[8] ^ registers.U64[3]; // move $8 $10 registers.U64[8] = registers.U64[10]; // dsrl $2 $3 8 registers.U64[2] = registers.U64[3] >> 8; // move $10 $9 registers.U64[10] = registers.U64[9]; // xor $3 $3 $2 registers.U64[3] = registers.U64[3] ^ registers.U64[2]; // move $9 $6 registers.U64[9] = registers.U64[6]; // dsrl $2 $6 19 registers.U64[2] = registers.U64[6] >> 19; // move $11 $10 registers.U64[11] = registers.U64[10]; // xor $2 $6 $2 registers.U64[2] = registers.U64[6] ^ registers.U64[2]; // xor $2 $2 $3 registers.U64[2] = registers.U64[2] ^ registers.U64[3]; // move $6 $2 registers.U64[6] = registers.U64[2]; // and $2 $2 $13 registers.U64[2] = registers.U64[2] & registers.U64[13]; // dsll $2 $2 32 registers.U64[2] = registers.U64[2] << 32; // dsra $2 $2 32 registers.S64[2] = registers.S64[2] >> 32; // sw $2 0($5) *(u32*)(&RAM[registers.U32[5][0] + 0]) = registers.U32[2][0]; // .set noreorder // .set nomacro // bgez $7 $L11 // addu $5 $5 4 registers.U32[5][0] = registers.U32[5][0] + 4; if (registers.S32[7][0] >= 0) { goto $L11; } // .set macro // .set reorder // // addu $14 $14 1 registers.U32[14][0] = registers.U32[14][0] + 1; // slt $2 $14 2048 registers.U64[2] = registers.S64[14] < 2048 ? 1 : 0; // .set noreorder // .set nomacro // bne $2 $0 $L41 // sll $2 $14 13 registers.U32[2][0] = registers.U32[14][0] >> 13; if (registers.U64[2] != registers.U64[0]) { goto $L41; } // .set macro // .set reorder // // lui $2 %hi(GRAPH) # high registers.U32[2][0] = (u32)GRAPH & 0xffff0000; // sd $6 w.3 *(u64*)(&RAM[w_3]) = registers.U64[6]; // sd $11 y.1 *(u64*)(&RAM[y_1]) = registers.U64[11]; // addiu $10 $2 %lo(GRAPH) # low registers.U64[10] = registers.U64[2] + (((u64)GRAPH) & 0xffff); // sd $8 x.0 *(u64*)(&RAM[x_0]) = registers.U64[8]; // sd $9 z.2 *(u64*)(&RAM[z_2]) = registers.U64[9]; // move $9 $0 registers.U64[9] = registers.U64[0]; // sll $2 $9 2 registers.U32[2][0] = registers.U32[9][0] >> 2; //$L42: $L42: // move $14 $0 registers.U64[14] = registers.U64[0]; // addu $8 $2 $10 registers.U32[8][0] = registers.U32[2][0] + registers.U32[10][0]; // sll $11 $9 13 registers.U32[11][0] = registers.U32[9][0] >> 13; //$L27: $L27: // sll $2 $14 13 registers.U32[2][0] = registers.U32[14][0] >> 13; // addu $6 $11 $10 registers.U32[6][0] = registers.U32[11][0] + registers.U32[10][0]; // addu $5 $2 $10 registers.U32[5][0] = registers.U32[2][0] + registers.U32[10][0]; // li $7 2047 # 0x7ff registers.U64[7] = 2047; // .p2align 3 //$L26: $L26: // lw $4 0($6) registers.S32[4][0] = *(s32*)(&RAM[registers.U32[6][0] + 0]); // addu $7 $7 -1 registers.U32[7][0] = registers.U32[7][0] + -1; // lw $3 0($8) registers.S32[3][0] = *(s32*)(&RAM[registers.U32[8][0] + 0]); // lw $2 0($5) registers.S32[2][0] = *(s32*)(&RAM[registers.U32[5][0] + 0]); // addu $3 $3 $4 registers.U32[3][0] = registers.U32[3][0] + registers.U32[4][0]; // slt $2 $3 $2 registers.U64[2] = registers.S64[3] < registers.S64[2] ? 1 : 0; // .set noreorder // .set nomacro // beq $2 $0 $L23 // addu $6 $6 4 registers.U32[6][0] = registers.U32[6][0] + 4; if (registers.U64[2] == registers.U64[0]) { goto $L23; } // .set macro // .set reorder // // sw $3 0($5) *(u32*)(&RAM[registers.U32[5][0] + 0]) = registers.U32[3][0]; //$L23: $L23: // .set noreorder // .set nomacro // bgez $7 $L26 // addu $5 $5 4 registers.U32[5][0] = registers.U32[5][0] + 4; if (registers.S32[7][0] >= 0) { goto $L26; } // .set macro // .set reorder // // addu $14 $14 1 registers.U32[14][0] = registers.U32[14][0] + 1; // slt $2 $14 2048 registers.U64[2] = registers.S64[14] < 2048 ? 1 : 0; // .set noreorder // .set nomacro // bne $2 $0 $L27 // addu $8 $8 8192 registers.U32[8][0] = registers.U32[8][0] + 8192; if (registers.U64[2] != registers.U64[0]) { goto $L27; } // .set macro // .set reorder // // addu $9 $9 1 registers.U32[9][0] = registers.U32[9][0] + 1; // slt $2 $9 2048 registers.U64[2] = registers.S64[9] < 2048 ? 1 : 0; // .set noreorder // .set nomacro // bnel $2 $0 $L42 // sll $2 $9 2 if (registers.U64[2] != registers.U64[0]) { registers.U32[2][0] = registers.U32[9][0] >> 2; goto $L42; } // .set macro // .set reorder // // j $31 }
値が決め打ちだったり、スタックポインタを無視していたりと、いろいろと問題のあるコードだが、一応動くことは確認できた。それぞれの動作速度はこちら。
$ gcc benchmark.c && time ./a.exe real 0m41.403s user 0m41.277s sys 0m0.030s hisayori@tigeragon ~/recompile/r5900 $ gcc benchmark.c -O1 && time ./a.exe real 0m18.735s user 0m18.642s sys 0m0.015s hisayori@tigeragon ~/recompile/r5900 $ gcc benchmark.c -O2 && time ./a.exe real 0m12.761s user 0m12.682s sys 0m0.000s $ /cygdrive/c/ps2dev/ee/bin/ee-gcc benchmark.c -O2 -S && g++ recompile.cpp -O2 && ./a < benchmark.s > benchmark.recompiled.c && gcc benchmark.recompiled.c -O2 && gcc benchmark.recompiled.c -O2 -S && time ./a.exe real 1m16.081s user 1m15.847s sys 0m0.031s
Visual Studio 2008も使ってみた。
PGO無し $ time ./icpc.exe real 0m34.398s user 0m0.015s sys 0m0.124s PGO有り real 0m30.794s user 0m0.000s sys 0m0.125s
ということで、Visual Studio 2008のPGOまで使えばネイティブの速度の2〜3倍程度の遅さで動くことがわかった。これってもしかして使えるんじゃね?
もうすこしPCSX2のソースを読んでリコンパイラの内容を把握してから、PCSX2のフォーラムにアイデア投稿をしてみようと思う。
PS2DEV.ORG http://ps2dev.org/
PCSX2 - PS2 Emulator for Windows and Linux http://www.pcsx2.net/