Memory Management

地址空间

Linux 使用 mm_struct (memory descriptor) 来描述进程的虚拟地址空间。可以使用 pmap 工具或从 /proc/[PID]/maps 中查看进程地址空间中各 area 的相关信息。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

char *message = "hello world";
int count = 0;

int main(int argc, char *argv[]) {
  printf("pid: %d\n", getpid());

  int i = 0;
  void* ptr1 = malloc(1024);
  void* ptr2 = malloc(1024);
  
  printf("main: %p\n", main);
  printf("main: %p\n", envp);
  printf("\t[%s]: %p\n", envp[0], envp[0]);

  printf("argv: %p\n", argv);
  for (int x = 0; x < argc; x++)
    printf("\t[%s]: %p\n", argv[x], argv[x]);
  
  printf("message: %p\n", message);
  printf("count: %p\n", &count);
  printf("i: %p\n", &i);
  printf("ptr1: %p, ptr2: %p\n", ptr1, ptr2);

  sleep(60);
  free(ptr1);
  free(ptr2);
  return 0;
}

更进一步，可以通过 /proc/[PID]/mem 来修改一个正在运行进程的地址空间中的信息 (例如，让如下进程打印的 hello 字符串变成 bye)。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main() {
  printf("pid: %d\n", getpid());
  char *s = strdup("hello");

  while(1) {
    printf("%s: %p\n", s, s);
    sleep(2);
  }
  return 0;
}

'''
Locate and replace all occurrence of a string in the heap of a process    
Usage: sudo python hack.py [PID] [search_string] [replace_by_string]
'''
import sys
import re

# parse args
pid = int(sys.argv[1])
search_string = str(sys.argv[2])
write_string = str(sys.argv[3])
maps_filename = "/proc/{}/maps".format(pid)
mem_filename = "/proc/{}/mem".format(pid)

# open the maps file
maps_file = open('/proc/{}/maps'.format(pid), 'r')
for line in maps_file:
  sline = line.split(' ')
  # check if we found the heap
  if sline[-1][:-1] != "[heap]":
    continue

  # get start and end of the heap in the virtual memory
  addr = sline[0]
  addr = addr.split("-")
  addr_start = int(addr[0], 16)
  addr_end = int(addr[1], 16)
  print("[*] Heap Addr start [{:x}] | end [{:x}]".format(addr_start, addr_end))
  break

# open and read mem file
mem_file = open(mem_filename, 'rb+')
mem_file.seek(addr_start)
heap = mem_file.read(addr_end - addr_start)
    
# find all occurrence of the string
loc = [m.start() for m in re.finditer(bytes(search_string, "ASCII"), heap)]
for each in loc:
  print("[*] Found '{}' at {:x}".format(search_string, each))

# write the new string
for each in loc:
  mem_file.seek(addr_start + each)
  mem_file.write(bytes(write_string, "ASCII") + b'\x00')
  print("[*] Writing '{}' at {:x}".format(write_string, addr_start + each))

# close files
maps_file.close()
mem_file.close()

代码共享

为了提高内存资源利用效率，在创建同一个应用程序的多个进程时，操作系统应该只在物理内存中存放一份可读的代码段。

#include <stdio.h>
#include <unistd.h>

int main() {
  // 128MB of NOP (assembly instruction that does nothing)
  asm volatile(".fill (128 << 20), 1, 0x90");
  printf("pid = %d\n", getpid());
  while(1);
}

地址转换

Linux 使用分页机制来进行虚拟地址到物理地址的转换，其中 /proc/[PID]/pagemap 提供了用户程序访问 Page Table 的接口，通过其中的信息我们可以查询每个 VPN (Virtual Page Number) 对应的 PFN (Physical Page Number) 并计算给定虚拟地址对应的物理地址。

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>

#define PAGEMAP_LENGTH 8
#define PAGE_SHIFT 12 

unsigned long get_physical_address(void *virtual_address) {
  // get the page size for the system
  long PAGE_SIZE = sysconf(_SC_PAGESIZE);

  // open the pagemap file for the current process (ignore error handling)
  char pagemap_path[100];
  sprintf(pagemap_path, "/proc/%d/pagemap", getpid());
  int fd = open(pagemap_path, O_RDONLY);
  
  // seek to the correct offset in the pagemap file
  unsigned long offset = (unsigned long) virtual_address / PAGE_SIZE * PAGEMAP_LENGTH;
  lseek(fd, offset, SEEK_SET);
  
  // read the entry from the pagemap file
  uint64_t pagemap_entry;
  read(fd, &pagemap_entry, sizeof(uint64_t));

  // extract the page frame number, which is in bits 0-54 of pagemap entry
  unsigned long PFN = pagemap_entry & ((1ULL << 55) - 1);
    
  // calculate the physical address
  unsigned long physical_address = (PFN * PAGE_SIZE) + ((unsigned long) virtual_address % PAGE_SIZE);
  
  close(fd);
  return physical_address;
}

int main() {
  void *ptr = malloc(1024);

  printf("main: VA %p -> PA 0x%lx \n", main, get_physical_address(main));
  printf("ptr: VA %p -> PA 0x%lx \n", ptr, get_physical_address(ptr));
  printf("printf: VA %p -> PA 0x%lx \n", printf, get_physical_address(printf));

  sleep(60);
  return 0;
}

如果你在编译 Kernel 的过程中设置了 CONFIG_STRICT_DEVMEM 的话，你可以进一步通过 /dev/mem 来修改物理地址对应的值。

写时复制

写时复制 (Copy-On-Write) 是现代操作系统中的一个重要机制。在多个进程同时请求相同资源时（例如，调用 fork() 时的内存空间复制），他们会首先共享这个资源，直到某个进程试图对该资源进行修改时才会为其创建一个私有副本 (private copy)。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>

#define SIZE (1024LL * 1024 * 1024)

int main() {
  // allocate 1GB memory space	
  char *data = malloc(SIZE);
  memset(data, '^', SIZE);
 
  for (int i = 0; i < 999; i++) {
    if (fork() == 0) 
      break;
  }

  // 1000 processes go here
  // get a random number
  unsigned int idx = 0;
  int fd = open("/dev/urandom", O_RDONLY);
  read(fd, &idx, sizeof(idx));
  close(fd);
  idx %= SIZE;

  data[idx] = '_';
  printf("pid = %d, write data[%u], %c%c%c\n", getpid(), idx, 
		  data[idx-1], data[idx], data[idx+1]);

  sleep(30);
  return 0;
}

虚拟内存

只有当一个进程需要访问某个页时，操作系统才需要将这个页分配给进程 (或从磁盘调度入内存)。通过这样一种请求调页（Demand Paging）机制操作系统可以打破物理内存资源的限制。

#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>

#define GB (1024LL * 1024 * 1024)

int main() {
  uint8_t *ptr = mmap(NULL, 1024*GB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
  printf("ptr: %lx\n", (uintptr_t)ptr);
  if (ptr == MAP_FAILED) {
    printf("%s\n", strerror(errno));
    exit(-1);
  }

  *(ptr + 2*GB) = 10;
  *(ptr + 4*GB) = 10;
  *(ptr + 11*GB) = 10;

  printf("ptr + 2*GB -> %d\n", *(ptr + 2*GB));
  printf("ptr + 4*GB -> %d\n", *(ptr + 4*GB));
  printf("ptr + 6*GB -> %d\n", *(ptr + 6*GB));
  sleep(30);
  return 0;
}

直接执行上述代码会发现 mmap 执行失败 (cannot allocate memory)，这是因为 Linux 虽然默认允许 Memory Overcommit，即允许进程使用超过物理内存大小的地址空间，但对于一些极端的内存请求情况会拒绝分配 (例如，一次请求就超过物理内存大小)。可以通过修改 overcommit_memory 值来改变系统行为：

echo 1 | sudo tee /proc/sys/vm/overcommit_memory