IDA变量分析错误

2023.9.15

使用IDA进行分析时发现一些奇怪的代码,特别是有一些奇怪的指针强转(例如引用char数组的元素时强转为_DWORD*)时要去注意看IDA中变量的类型有无错误,多余,缺失.

例题1—BUUCTF-不一样的flag

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// IDA 分析出
int __cdecl __noreturn main(int argc, const char **argv, const char **envp)
{
char v3[29]; // [esp+17h] [ebp-35h] BYREF
int v4; // [esp+34h] [ebp-18h]
int v5; // [esp+38h] [ebp-14h] BYREF
int i; // [esp+3Ch] [ebp-10h]
_BYTE v7[12]; // [esp+40h] [ebp-Ch] BYREF

__main();
v4 = 0;
strcpy(v3, "*11110100001010000101111#");
while ( 1 )
{
puts("you can choose one action to execute");
puts("1 up");
puts("2 down");
puts("3 left");
printf("4 right\n:");
scanf("%d", &v5);
if ( v5 == 2 )
{
++*(_DWORD *)&v3[25];
}
else if ( v5 > 2 )
{
if ( v5 == 3 )
{
--v4;
}
else
{
if ( v5 != 4 )
LABEL_13:
exit(1);
++v4;
}
}
else
{
if ( v5 != 1 )
goto LABEL_13;
--*(_DWORD *)&v3[25];
}
for ( i = 0; i <= 1; ++i )
{
if ( *(int *)&v3[4 * i + 25] < 0 || *(int *)&v3[4 * i + 25] > 4 )
exit(1);
}
if ( v7[5 * *(_DWORD *)&v3[25] - 41 + v4] == 49 )
exit(1);
if ( v7[5 * *(_DWORD *)&v3[25] - 41 + v4] == 35 )
{
puts("\nok, the order you enter is the flag!");
exit(0);
}
}
}

注意--*(_DWORD *)&v3[25];这行代码
实际上存储地图(分析题意得知是一个走迷宫游戏)的只有5x5,也就是0-24的下标.
且v3[25]的地址被强转为int*,也就是说原本的v3[25-28]这4个字节其实是一个单独的变量,实际上就是当前所在行row,
所以v3的类型应该被改成char v3[25];而不是char v3[29];

注意if ( *(int *)&v3[4 * i + 25] < 0 || *(int *)&v3[4 * i + 25] > 4 )这行代码
首先把v4(v3修改后新生成的那个v4)重命名成col,
前面把v3[25-28]这4个字节修改成row后变成if ( *(&v4 + i) < 0 || *(&v4 + i) > 4 ),
更能看出问题—这个循环只有两次,实际上就是判断row和col两个变量是否 <0 或 >4
不过这里是代码分析,并不是变量分析错误的问题

注意if ( v7[5 * *(_DWORD *)&v3[25] - 41 + v4] == 35 )这行代码
有_BYTE v7[12];这个数组,但我们只用了v8的地址,那么再根据这个函数的局部变量分布和5*5的地图这个信息,即:

1
2
3
4
5
6
7
8
9
int main(...){
char v3[25]; // [esp+17h] [ebp-35h] BYREF //已修改
int row; // [esp+30h] [ebp-1Ch] //新生成的int变量
int col; // [esp+34h] [ebp-18h]
int v6; // [esp+38h] [ebp-14h] BYREF
int i; // [esp+3Ch] [ebp-10h]
_BYTE v8[12]; // [esp+40h] [ebp-Ch] BYREF
...
}

那么有4x4+25=41个字节的向前偏移,即v3[25]的25个字节,v4,v5,v6,i的16个字节
所以代码可以修改为if ( v7[5 * row + col] == '#' ),实际上就是把v3当做5*5的二维数组来使用
这样整个代码就非常清晰了

例题2—BUUCTF-[ACTF新生赛2020]easyre

这个题一样,能看出来IDA分析的结果中常常有 int和char或其指针反复互相转换使用的代码

修改前IDA分析出来的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
int __cdecl main(int argc, const char **argv, const char **envp)
{
_BYTE v4[12]; // [esp+12h] [ebp-2Eh] BYREF
_DWORD v5[3]; // [esp+1Eh] [ebp-22h]
_BYTE v6[5]; // [esp+2Ah] [ebp-16h] BYREF
int v7; // [esp+2Fh] [ebp-11h]
int v8; // [esp+33h] [ebp-Dh]
int v9; // [esp+37h] [ebp-9h]
char v10; // [esp+3Bh] [ebp-5h]
int i; // [esp+3Ch] [ebp-4h]

__main();
qmemcpy(v4, "*F'\"N,\"(I?+@", sizeof(v4));
printf("Please input:");
scanf("%s", v6);
if ( v6[0] != 65 || v6[1] != 67 || v6[2] != 84 || v6[3] != 70 || v6[4] != 123 || v10 != 125 )
return 0;
v5[0] = v7;
v5[1] = v8;
v5[2] = v9;
for ( i = 0; i <= 11; ++i )
{
if ( v4[i] != _data_start__[*((char *)v5 + i) - 1] )
return 0;
}
printf("You are correct!");
return 0;
}

经过手动修改的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// 修改后的IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
_BYTE v4[12]; // [esp+12h] [ebp-2Eh] BYREF
char v5[12]; // [esp+1Eh] [ebp-22h]
_BYTE v6[18]; // [esp+2Ah] [ebp-16h] BYREF
int i; // [esp+3Ch] [ebp-4h]

__main();
qmemcpy(v4, "*F'\"N,\"(I?+@", sizeof(v4));
printf("Please input:");
scanf("%s", v6);
if ( v6[0] != 65 || v6[1] != 67 || v6[2] != 84 || v6[3] != 70 || v6[4] != 123 || v6[17] != 125 )
return 0;
*(_DWORD *)v5 = *(_DWORD *)&v6[5];
*(_DWORD *)&v5[4] = *(_DWORD *)&v6[9];
*(_DWORD *)&v5[8] = *(_DWORD *)&v6[13];
for ( i = 0; i <= 11; ++i )
{
if ( v4[i] != _data_start__[v5[i] - 1] )
return 0;
}
printf("You are correct!");
return 0;
}

解密脚本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <string.h>
int main() {
char v4[100] = "*F'\"N,\"(I?+@";
char _data_start__[200] = {
0x7e, 0x7D, 0x7C, 0x7B, 0x7A, 0x79, 0x78, 0x77, 0x76, 0x75,
0x74,
0x73, 0x72, 0x71, 0x70, 0x6F, 0x6E, 0x6D, 0x6C, 0x6B, 0x6A,
0x69, 0x68, 0x67, 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60,
0x5F, 0x5E, 0x5D, 0x5C, 0x5B, 0x5A, 0x59, 0x58, 0x57, 0x56,
0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4F, 0x4E, 0x4D, 0x4C,
0x4B, 0x4A, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42,
0x41, 0x40, 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2F, 0x2E,
0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24,
0x23, 0x20, 0x21, 0x22, 0x00
};
char v6[100] = {0};
v6[0] = 65 ;
v6[1] = 67 ;
v6[2] = 84 ;
v6[3] = 70 ;
v6[4] = 123 ;
v6[17] = 125;

char* p;
int i;
for (p = &v6[5], i = 0; p != &v6[5] + 12; ++p, ++i) {
for (int c = 0; c < 128; ++c) {
if (v4[i] == _data_start__[c - 1]) {
*p = c;
break;
}
}
}
printf("%s", v6);
return 0;
}