Another NDH quals another VM wait ...
Introduction
Last weekend I participated in the Nuit du Hack CTF Quals 2013 with my teamate delroth, you can find an excellent write-up about escaping a Python sandbox on his blog.
So I decided to post a writeup too about the last crackme challenge "Crackme #1".
This Reverse Engineering challenge was a virtual machine, so I decided to reverse the full vm, but today I figured out something ...
Do you remember last ndh prequals ? no no I will not talk about bmp chall but about VMNDH-2k12, it was exactly the same VM (I'm very sad to have lost some hours to reverse something that I know), the only changes was the opcode values, so this post will not deal with the vm stuff you have all the information on this website.
VM Opcode
You can find the vm dump at :
LOAD:000000000048F2E8 vm_dump db 7Fh ;
LOAD:000000000048F2E9 db 45h ; E
LOAD:000000000048F2EA db 4Ch ; L
LOAD:000000000048F2EB db 46h ; F
LOAD:000000000048F2EC db 72h ; r
LOAD:000000000048F2ED db 3
LOAD:000000000048F2EE db 0Ah
Real vm code start at offset 0x06 (byte 0x0A) and the size of the vm code is 0x371 (881 bytes).
Do you want to mary me ?
I love IDA, and I like writting plugins, scripts, processor (when I don't have to reverse IDA for understanding how their api works :p) in python, so after reversing almost the same vm, I decided to write my ndh2k13 processor, here is the code :
from idaapi import *
class DecodingError(Exception):
pass
class NDHProcessor(processor_t):
id = 0x8000 + 5855
flag = PR_ADJSEGS | PRN_HEX
cnbits = 8
dnbits = 8
psnames = ["ndh2k13"]
plnames = ["ndh2k13 VM CPU"]
segreg_size = 0
instruc_start = 0
assembler = {
"flag": AS_NCHRE | ASH_HEXF4 | ASD_DECF1 | ASO_OCTF3 | ASB_BINF2
| AS_NOTAB,
"uflag": 0,
"name": "NDH assembler",
"origin": ".org",
"end": ".end",
"cmnt": ";",
"ascsep": '"',
"accsep": "'",
"esccodes": "\"'",
"a_ascii": ".ascii",
"a_byte": ".byte",
"a_word": ".word",
"a_bss": "dfs %s",
"a_seg": "seg",
"a_curip": "PC",
"a_public": "",
"a_weak": "",
"a_extrn": ".extern",
"a_comdef": "",
"a_align": ".align",
"lbrace": "(",
"rbrace": ")",
"a_mod": "%",
"a_band": "&",
"a_bor": "|",
"a_xor": "^",
"a_bnot": "~",
"a_shl": "<<",
"a_shr": ">>",
"a_sizeof_fmt": "size %s",
}
reg_names = regNames = [
"R0", "R1", "R2", "R3", "R4",
"R5", "R6", "R7", "SP", "BP",
"CS", "DS"
]
instruc = instrs = [
{ 'name': 'PUSH', 'feature': CF_USE1 },
{ 'name': 'PUSHB', 'feature': CF_USE1 },
{ 'name': 'PUSHW', 'feature': CF_USE1 },
{ 'name': 'NOP', 'feature': 0 },
{ 'name': 'POP', 'feature': CF_USE1 },
{ 'name': 'MOV', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'MOVB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'MOVW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ADD', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ADDB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ADDW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'SUB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'SUBB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'SUBW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'MUL', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'MULB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'MULW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'DIV', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'DIVB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'DIVW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'INC', 'feature': CF_USE1 },
{ 'name': 'DEC', 'feature': CF_USE1 },
{ 'name': 'OR', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ORB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ORW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'AND', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ANDB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'ANDW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'XOR', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'XORB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'XORW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'NOT', 'feature': CF_USE1 },
{ 'name': 'JZ', 'feature': CF_USE1 },
{ 'name': 'JNZ', 'feature': CF_USE1 },
{ 'name': 'JMPS', 'feature': CF_USE1 | CF_STOP },
{ 'name': 'TEST', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'CMP', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'CMPB', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'CMPW', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'CALL', 'feature': CF_USE1 | CF_CALL },
{ 'name': 'RET', 'feature': CF_STOP },
{ 'name': 'JMPL', 'feature': CF_USE1 | CF_STOP },
{ 'name': 'END', 'feature': CF_STOP },
{ 'name': 'XCHG', 'feature': CF_USE1 | CF_USE2 },
{ 'name': 'JA', 'feature': CF_USE1 },
{ 'name': 'JB', 'feature': CF_USE1 },
{ 'name': 'SYSCALL', 'feature': 0 },
]
instruc_end = len(instruc)
def __init__(self):
processor_t.__init__(self)
self._init_instructions()
self._init_registers()
def _init_instructions(self):
self.inames = {}
for idx, ins in enumerate(self.instrs):
self.inames[ins['name']] = idx
def _init_registers(self):
self.reg_ids = {}
for i, reg in enumerate(self.reg_names):
self.reg_ids[reg] = i
self.regFirstSreg = self.regCodeSreg = self.reg_ids["CS"]
self.regLastSreg = self.regDataSreg = self.reg_ids["DS"]
def _read_cmd_byte(self):
ea = self.cmd.ea + self.cmd.size
byte = get_full_byte(ea)
self.cmd.size += 1
return byte
def _read_reg(self):
r = self._read_cmd_byte()
if r >= 0x0A:
raise DecodingError()
return r
def _ana_ntypeinstr(self, name, valid):
cmd = self.cmd
optype = self._read_cmd_byte()
if optype not in valid:
raise DecodingError()
if optype not in (4, 5, 6):
cmd[0].type = o_reg
cmd[0].dtyp = dt_word
cmd[0].reg = self._read_reg()
if optype == 0x0:
cmd.itype = self.inames[name]
cmd[1].type = o_reg
cmd[1].dtyp = dt_word
cmd[1].reg = self._read_reg()
elif optype == 0x1:
cmd.itype = self.inames[name + "B"]
cmd[1].type = o_imm
cmd[1].dtyp = dt_byte
cmd[1].value = self._read_cmd_byte()
elif optype == 0x2:
cmd.itype = self.inames[name + "W"]
cmd[1].type = o_imm
cmd[1].dtyp = dt_word
cmd[1].value = self._read_cmd_byte()
cmd[1].value |= self._read_cmd_byte() << 8
elif optype == 0x3:
cmd.itype = self.inames[name]
elif optype == 0x4:
cmd.itype = self.inames[name + "B"]
cmd[0].type = o_imm
cmd[0].dtyp = dt_byte
cmd[0].value = self._read_cmd_byte()
elif optype == 0x5:
cmd.itype = self.inames[name + "W"]
cmd[0].type = o_imm
cmd[0].dtyp = dt_word
cmd[0].value = self._read_cmd_byte()
cmd[0].value |= self._read_cmd_byte() << 8
elif optype == 0x6:
cmd.itype = self.inames[name + "B"]
cmd[0].type = o_phrase
cmd[0].dtyp = dt_word
cmd[0].reg = self._read_reg()
cmd[1].type = o_reg
cmd[1].dtyp = dt_word
cmd[1].reg = self._read_reg()
elif optype == 0xA:
cmd.itype = self.inames[name]
cmd[1].type = o_phrase
cmd[1].dtyp = dt_word
cmd[1].reg = self._read_reg()
else:
raise DecodingError()
def _ana_one_r(self, name):
cmd = self.cmd
cmd.itype = self.inames[name]
cmd[0].type = o_reg
cmd[0].dtyp = dt_word
cmd[0].reg = self._read_reg()
def _ana_two_r(self, name):
cmd = self.cmd
cmd.itype = self.inames[name]
cmd[0].type = o_reg
cmd[0].dtyp = dt_word
cmd[0].reg = self._read_reg()
cmd[1].type = o_reg
cmd[1].dtyp = dt_word
cmd[1].reg = self._read_reg()
def _ana_jmp(self, name, size=16):
cmd = self.cmd
cmd.itype = self.inames[name]
addr = self._read_cmd_byte()
if size == 16:
addr |= self._read_cmd_byte() << 8
if (addr & 0x8000):
addr -= 0x10000
else:
if addr & 0x80:
addr -= 0x100
addr += cmd.ea + cmd.size
cmd[0].type = o_near
cmd[0].dtyp = dt_word
cmd[0].addr = addr
def _ana(self):
cmd = self.cmd
opcode = self._read_cmd_byte()
if opcode == 0x1F:
self._ana_ntypeinstr("PUSH", valid=(3, 4, 5))
elif opcode == 0x0A:
cmd.itype = self.inames["JMPL"]
self._ana_jmp("JMPL")
elif opcode == 0x1C:
self._ana_ntypeinstr("MOV", valid=(0, 1, 2, 6, 7, 8, 9, 10))
elif opcode == 0x0C:
cmd.itype = self.inames["CALL"]
flags = self._read_cmd_byte()
if flags == 0x4:
addr = self._read_cmd_byte()
addr |= self._read_cmd_byte() << 8
if (addr & 0x8000):
addr -= 0x10000
addr += cmd.ea + cmd.size
cmd[0].type = o_near
cmd[0].dtyp = dt_word
cmd[0].addr = addr
elif flags == 0x3:
reg = self._read_reg()
cmd[0].type = o_reg
cmd[0].dtyp = dt_word
cmd[0].reg = reg
else:
raise DecodingError()
elif opcode == 0x30:
cmd.itype = self.inames["SYSCALL"]
elif opcode == 0x0B:
cmd.itype = self.inames["RET"]
elif opcode == 0x1A:
self._ana_ntypeinstr("SUB", valid=(0, 1, 2))
elif opcode == 0x0D:
self._ana_ntypeinstr("CMP", valid=(0, 1, 2))
elif opcode == 0x11:
self._ana_jmp("JZ")
elif opcode == 0x09:
cmd.itype = self.inames["END"]
elif opcode == 0x1F:
self._ana_jmp("JMPS", size=8)
elif opcode == 0x17:
self._ana_one_r("INC")
elif opcode == 0x10:
self._ana_jmp("JNZ")
elif opcode == 0x16:
self._ana_one_r("DEC")
elif opcode == 0x13:
self._ana_ntypeinstr("XOR", valid=(0, 1, 2))
elif opcode == 0x0E:
self._ana_two_r("TEST")
elif opcode == 0x1D:
self._ana_one_r("POP")
elif opcode == 0x07:
self._ana_jmp("JA")
elif opcode == 0x0F:
self._ana_jmp("JMPS", size=8)
elif opcode == 0x06:
self._ana_jmp("JB")
elif opcode == 0x1B:
self._ana_ntypeinstr("ADD", valid=(0, 1, 2))
elif opcode == 0x08:
self._ana_two_r("XCHG")
elif opcode == 0x19:
self._ana_ntypeinstr("MUL", valid=(0, 1, 2))
else:
raise DecodingError()
return cmd.size
def ana(self):
try:
return self._ana()
except DecodingError:
return 0
def _emu_operand(self, op):
if op.type == o_mem:
ua_dodata2(0, op.addr, op.dtyp)
ua_add_dref(0, op.addr, dr_R)
elif op.type == o_near:
if self.cmd.get_canon_feature() & CF_CALL:
fl = fl_CN
else:
fl = fl_JN
ua_add_cref(0, op.addr, fl)
def emu(self):
cmd = self.cmd
ft = cmd.get_canon_feature()
if ft & CF_USE1:
self._emu_operand(cmd[0])
if ft & CF_USE2:
self._emu_operand(cmd[1])
if ft & CF_USE3:
self._emu_operand(cmd[2])
if not ft & CF_STOP:
ua_add_cref(0, cmd.ea + cmd.size, fl_F)
return True
def outop(self, op):
if op.type == o_reg:
out_register(self.reg_names[op.reg])
elif op.type == o_imm:
OutValue(op, OOFW_IMM)
elif op.type in [o_near, o_mem]:
ok = out_name_expr(op, op.addr, BADADDR)
if not ok:
out_tagon(COLOR_ERROR)
OutLong(op.addr, 16)
out_tagoff(COLOR_ERROR)
QueueMark(Q_noName, self.cmd.ea)
elif op.type == o_phrase:
out_symbol('[')
out_register(self.reg_names[op.reg])
out_symbol(']')
else:
return False
return True
def out(self):
cmd = self.cmd
ft = cmd.get_canon_feature()
buf = init_output_buffer(1024)
OutMnem(15)
if ft & CF_USE1:
out_one_operand(0)
if ft & CF_USE2:
OutChar(',')
OutChar(' ')
out_one_operand(1)
if ft & CF_USE3:
OutChar(',')
OutChar(' ')
out_one_operand(2)
term_output_buffer()
cvar.gl_comm = 1
MakeLine(buf)
def PROCESSOR_ENTRY():
return NDHProcessor()
If you have trouble for dumping vm_opcode, you can find the dump here.
Load the vm into IDA, choose "ndh2k13 VM CPU: ndh2k13", when IDA ask you for the memory organisation tell him :
- ROM START ADDRESS : 0x8000
- Loading address : 0x8000
Now you can start reverse :)
Solution
We have got only 3 syscall for this version of the vm :
- R0 = 0x01 : exit
- R0 = 0x03 : read
- R0 = 0x04 : write
The code of the vm start here :
ROM:8317 MOVW R0, aPassword
ROM:831C CALL write_msg
ROM:8320 SUBB SP, $20
ROM:8324 MOV R2, SP
ROM:8328 MOVB R1, 0
ROM:832C MOVB R3, $1F
ROM:8330 MOVB R0, $3
ROM:8334 SYSCALL ; SYSCALL READ
ROM:8335 MOV R0, R2
ROM:8339 CALL check_password
And the pseudo - code of the function check_password is :
char *key = 0x8342;
if (strlen(buf_password) == 9) // "\n" include
if (buf_password[0] ^ key[0] == 'S')
if (buf_password[1] ^ key[1] == '[')
if (buf_password[2] ^ key[2] == 'K')
if (buf_password[3] ^ key[3] == ')')
... etc ...
print("GOOD PASSWORD\n")
exit
print("BAD PASSWORD\n")
exit
And the good password is :
key = [0x12, 0x21, 0x02, 0x19, 0x25, 0x34, 0x29, 0x11]
res = ['S', '[', 'K', ')', 'R', 'v', 'Z', 'I']
password = ""
for i in xrange(0, 8):
password += chr(ord(res[i]) ^ key[i])
print password
The flag was 'AzI0wBsX'.
Conclusion
I'm an idiot to reverse an entire vm and not figure out it was the same than last year, but during ctf you want to be the fatest and don't (always) think about old challenges.
Another solution was to count the number of instructions executed by the vm, yeah because when you write crackme, I think you should compute hash, or something like that, because consecutive if statements is just lulz.
Enjoy :
#! /bin/sh
charset="a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"
charset="$charset _ - ! $ % ^ \\& \\* + = 0 1 2 3 4 5 6 7 8 9 > < ,"
cat > gdbscript <<EOF
set height 0
define countinstrs
p "instr"
c
countinstrs
end
b *0x0000000000400C7F # call handler opcode
run < guess
countinstrs
countinstrs
countinstrs
countinstrs
countinstrs
countinstrs
countinstrs
countinstrs
countinstrs
EOF
for c in $charset; do
guess="$c"
echo -n "$guess" > guess
echo "AAAAAAA" >> guess
echo -n "trying key `cat guess`... "
gdb ./simple < gdbscript 2>&1 \
| grep '^\$.*= "instr"$' \
| tail -1 \
| cut -d ' ' -f 1 \
| cut -c 2-
done
First letter :
trying key zAAAAAAA... 245
trying key AAAAAAAA... 252
trying key BAAAAAAA... 245
Edit script for replacing first letter by "A"
trying key AyAAAAAA... 252
trying key AzAAAAAA... 259
trying key AAAAAAAA... 252
And do it for the 8 letters.