Memory Management

地址空间

Linux 使用 mm_struct (memory descriptor) 来描述进程的虚拟地址空间。可以使用 pmap 工具或从 /proc/[PID]/maps 中查看进程地址空间中各 area 的相关信息。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

char *message = "hello world";
int count = 0;

int main(int argc, char *argv[]) {
  printf("pid: %d\n", getpid());

  int i = 0;
  void *ptr1 = malloc(1024);
  void *ptr2 = malloc(1024);
  
  printf("main: %p\n", main);
  printf("main: %p\n", envp);
  printf("\t[%s]: %p\n", envp[0], envp[0]);

  printf("argv: %p\n", argv);
  for (int x = 0; x < argc; x++)
    printf("\t[%s]: %p\n", argv[x], argv[x]);
  
  printf("message: %p\n", message);
  printf("count: %p\n", &count);
  printf("i: %p\n", &i);
  printf("ptr1: %p, ptr2: %p\n", ptr1, ptr2);

  sleep(60);
  free(ptr1);
  free(ptr2);
  return 0;
}

更进一步,可以通过 /proc/[PID]/mem 来修改一个正在运行进程的地址空间中的信息 (例如,让如下进程打印的 hello 字符串变成 bye)。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main() {
  printf("pid: %d\n", getpid());
  char *s = strdup("hello");

  while(1) {
    printf("%s: %p\n", s, s);
    sleep(2);
  }
  return 0;
}
'''
Locate and replace all occurrence of a string in the heap of a process    
Usage: sudo python hack.py [PID] [search_string] [replace_by_string]
'''
import sys
import re

# parse args
pid = int(sys.argv[1])
search_string = str(sys.argv[2])
write_string = str(sys.argv[3])
maps_filename = "/proc/{}/maps".format(pid)
mem_filename = "/proc/{}/mem".format(pid)

# open the maps file
maps_file = open('/proc/{}/maps'.format(pid), 'r')
for line in maps_file:
  sline = line.split(' ')
  # check if we found the heap
  if sline[-1][:-1] != "[heap]":
    continue

  # get start and end of the heap in the virtual memory
  addr = sline[0]
  addr = addr.split("-")
  addr_start = int(addr[0], 16)
  addr_end = int(addr[1], 16)
  print("[*] Heap Addr start [{:x}] | end [{:x}]".format(addr_start, addr_end))
  break

# open and read mem file
mem_file = open(mem_filename, 'rb+')
mem_file.seek(addr_start)
heap = mem_file.read(addr_end - addr_start)
    
# find all occurrence of the string
loc = [m.start() for m in re.finditer(bytes(search_string, "ASCII"), heap)]
for each in loc:
  print("[*] Found '{}' at {:x}".format(search_string, each))

# write the new string
for each in loc:
  mem_file.seek(addr_start + each)
  mem_file.write(bytes(write_string, "ASCII") + b'\x00')
  print("[*] Writing '{}' at {:x}".format(write_string, addr_start + each))

# close files
maps_file.close()
mem_file.close()

代码共享

为了提高内存资源利用效率,在创建同一个应用程序的多个进程时,操作系统应该只在物理内存中存放一份可读的代码段。

#include <stdio.h>
#include <unistd.h>

int main() {
  // 128MB of NOP (assembly instruction that does nothing)
  asm volatile(".fill (128 << 20), 1, 0x90");
  printf("pid = %d\n", getpid());
  while(1);
}

地址转换

Linux 使用分页机制来进行虚拟地址到物理地址的转换,其中 /proc/[PID]/pagemap 提供了用户程序访问 Page Table 的接口,通过其中的信息我们可以查询每个 VPN (Virtual Page Number) 对应的 PFN (Physical Page Number) 并计算给定虚拟地址对应的物理地址。

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>

#define PAGEMAP_LENGTH 8
#define PAGE_SHIFT 12 

unsigned long get_physical_address(void *virtual_address) {
  // get the page size for the system
  long PAGE_SIZE = sysconf(_SC_PAGESIZE);

  // open the pagemap file for the current process (ignore error handling)
  char pagemap_path[100];
  sprintf(pagemap_path, "/proc/%d/pagemap", getpid());
  int fd = open(pagemap_path, O_RDONLY);
  
  // seek to the correct offset in the pagemap file
  unsigned long offset = (unsigned long) virtual_address / PAGE_SIZE * PAGEMAP_LENGTH;
  lseek(fd, offset, SEEK_SET);
  
  // read the entry from the pagemap file
  uint64_t pagemap_entry;
  read(fd, &pagemap_entry, sizeof(uint64_t));

  // extract the page frame number, which is in bits 0-54 of pagemap entry
  unsigned long PFN = pagemap_entry & ((1ULL << 55) - 1);
    
  // calculate the physical address
  unsigned long physical_address = (PFN * PAGE_SIZE) + ((unsigned long) virtual_address % PAGE_SIZE);
  
  close(fd);
  return physical_address;
}

int main() {
  void *ptr = malloc(1024);

  printf("main: VA %p -> PA 0x%lx \n", main, get_physical_address(main));
  printf("ptr: VA %p -> PA 0x%lx \n", ptr, get_physical_address(ptr));
  printf("printf: VA %p -> PA 0x%lx \n", printf, get_physical_address(printf));

  sleep(60);
  return 0;
}

如果你在编译 Kernel 的过程中设置了 CONFIG_STRICT_DEVMEM 的话,你可以进一步通过 /dev/mem 来修改物理地址对应的值。