Memory Management

地址空间

Linux 使用 mm_struct (memory descriptor) 来描述进程的虚拟地址空间。可以使用 pmap 工具或从 /proc/[PID]/maps 中查看进程地址空间中各 area 的相关信息。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

char *message = "hello world";
int count = 0;

int main(int argc, char *argv[]) {
  printf("pid: %d\n", getpid());

  int i = 0;
  void* ptr1 = malloc(1024);
  void* ptr2 = malloc(1024);
  
  printf("main: %p\n", main);
  printf("main: %p\n", envp);
  printf("\t[%s]: %p\n", envp[0], envp[0]);

  printf("argv: %p\n", argv);
  for (int x = 0; x < argc; x++)
    printf("\t[%s]: %p\n", argv[x], argv[x]);
  
  printf("message: %p\n", message);
  printf("count: %p\n", &count);
  printf("i: %p\n", &i);
  printf("ptr1: %p, ptr2: %p\n", ptr1, ptr2);

  sleep(60);
  free(ptr1);
  free(ptr2);
  return 0;
}

更进一步,可以通过 /proc/[PID]/mem 来修改一个正在运行进程的地址空间中的信息 (例如,让如下进程打印的 hello 字符串变成 bye)。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main() {
  printf("pid: %d\n", getpid());
  char *s = strdup("hello");

  while(1) {
    printf("%s: %p\n", s, s);
    sleep(2);
  }
  return 0;
}
'''
Locate and replace all occurrence of a string in the heap of a process    
Usage: sudo python hack.py [PID] [search_string] [replace_by_string]
'''
import sys
import re

# parse args
pid = int(sys.argv[1])
search_string = str(sys.argv[2])
write_string = str(sys.argv[3])
maps_filename = "/proc/{}/maps".format(pid)
mem_filename = "/proc/{}/mem".format(pid)

# open the maps file
maps_file = open('/proc/{}/maps'.format(pid), 'r')
for line in maps_file:
  sline = line.split(' ')
  # check if we found the heap
  if sline[-1][:-1] != "[heap]":
    continue

  # get start and end of the heap in the virtual memory
  addr = sline[0]
  addr = addr.split("-")
  addr_start = int(addr[0], 16)
  addr_end = int(addr[1], 16)
  print("[*] Heap Addr start [{:x}] | end [{:x}]".format(addr_start, addr_end))
  break

# open and read mem file
mem_file = open(mem_filename, 'rb+')
mem_file.seek(addr_start)
heap = mem_file.read(addr_end - addr_start)
    
# find all occurrence of the string
loc = [m.start() for m in re.finditer(bytes(search_string, "ASCII"), heap)]
for each in loc:
  print("[*] Found '{}' at {:x}".format(search_string, each))

# write the new string
for each in loc:
  mem_file.seek(addr_start + each)
  mem_file.write(bytes(write_string, "ASCII") + b'\x00')
  print("[*] Writing '{}' at {:x}".format(write_string, addr_start + each))

# close files
maps_file.close()
mem_file.close()