Memory Management
地址空间
Linux 使用 mm_struct
(memory descriptor) 来描述进程的虚拟地址空间。可以使用 pmap
工具或从 /proc/{PID}/maps
中查看进程地址空间中各 area 的相关信息。
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
char *message = "hello world";
int main(int argc, char *argv[]) {
int i = 0;
void* ptr1 = malloc(1024);
void* ptr2 = malloc(1024);
printf("main: %p\n", main);
printf("argv: %p\n", argv);
for (int x = 0; x < argc; x++)
printf("\t[%s]: %p\n", argv[x], argv[x]);
printf("message: %p\n", message);
printf("i: %p\n", &i);
printf("ptr1: %p\n", ptr1);
printf("ptr2: %p\n", ptr2);
free(ptr1);
free(ptr2);
sleep(20);
return 0;
}
更进一步,可以通过 /proc/{PID}/mem
来修改一个正在运行进程的地址空间中的信息 (例如,让如下进程打印的 hello
字符串变成 bye
)。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int main() {
printf("pid: %d\n", getpid());
char *s = strdup("hello");
while(1) {
printf("%s: %p\n", s, s);
sleep(2);
}
return 0;
}
'''
Locate and replace all occurrence of a string in the heap of a process
Usage: ./hack.py [PID] [search_string] [replace_by_string]
'''
import sys
import re
# parse args
pid = int(sys.argv[1])
search_string = str(sys.argv[2])
write_string = str(sys.argv[3])
maps_filename = "/proc/{}/maps".format(pid)
mem_filename = "/proc/{}/mem".format(pid)
# open the maps file
maps_file = open('/proc/{}/maps'.format(pid), 'r')
for line in maps_file:
sline = line.split(' ')
# check if we found the heap
if sline[-1][:-1] != "[heap]":
continue
# get start and end of the heap in the virtual memory
addr = sline[0]
addr = addr.split("-")
addr_start = int(addr[0], 16)
addr_end = int(addr[1], 16)
print("[*] Heap Addr start [{:x}] | end [{:x}]".format(addr_start, addr_end))
break
# open and read mem file
mem_file = open(mem_filename, 'rb+')
mem_file.seek(addr_start)
heap = mem_file.read(addr_end - addr_start)
# find all occurrence of the string
loc = [m.start() for m in re.finditer(bytes(search_string, "ASCII"), heap)]
for each in loc:
print("[*] Found '{}' at {:x}".format(search_string, each))
# write the new string
for each in loc:
mem_file.seek(addr_start + each)
mem_file.write(bytes(write_string, "ASCII") + b'\x00')
print("[*] Writing '{}' at {:x}".format(write_string, addr_start + each))
# close files
maps_file.close()
mem_file.close()
代码共享
为了提高内存资源利用效率,在创建同一个应用程序的多个进程时,操作系统应该只在物理内存中存放一份可读的代码段。
#include <stdio.h>
#include <unistd.h>
int main() {
// 128MB of NOP (assembly instruction that does nothing)
asm volatile(".fill (128 << 20), 1, 0x90");
printf("pid = %d\n", getpid());
sleep(30);
}
地址转换
Linux 使用分页机制来进行虚拟地址到物理地址的转换,其中 /proc/{PID}/pagemap
提供了用户程序访问 Page Table 的接口,通过其中的信息我们可以查询每个 VPN (Virtual Page Number) 对应的 PFN (Physical Page Number) 并计算给定虚拟地址对应的物理地址。
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#define PAGEMAP_LENGTH 8
#define PAGE_SHIFT 12
unsigned long get_physical_address(void *virtual_address) {
// get the page Ssize for the system
long page_size = sysconf(_SC_PAGESIZE);
// get offset in the pagemap file
unsigned long offset = (unsigned long)virtual_address / page_size * PAGEMAP_LENGTH;
// open the pagemap file for the current process (ignore error handling)
char pagemap_path[100];
sprintf(pagemap_path, "/proc/%d/pagemap", getpid());
int fd = open(pagemap_path, O_RDONLY);
// seek to the correct position in the pagemap file
lseek(fd, offset, SEEK_SET) == -1)
// read the entry from the pagemap file
uint64_t pagemap_entry;
read(fd, &pagemap_entry, sizeof(uint64_t));
// extract the page frame number, which is in bits 0-54 of pagemap entry
unsigned long PFN = pagemap_entry & ((1ULL << 55) - 1);
// calculate the physical address
unsigned long physical_address = (PFN * page_size) + ((unsigned long)virtual_address % page_size);
close(fd);
return physical_address;
}
int main() {
void *va_main = main;
void *va_ptr = malloc(1024);
printf("main: VA %p -> PA 0x%lx \n", va_main, get_physical_address(va_main));
printf("ptr: VA %p -> PA 0x%lx \n", va_ptr, get_physical_address(va_ptr));
while(1) {
sleep(1);
}
return 0;
}
如果你在编译 Kernel 的过程中设置了 CONFIG_STRICT_DEVMEM
的话,你可以进一步通过 /dev/mem
来修改物理地址对应的值。
写时复制
写时复制 (Copy-On-Write) 是现代操作系统中的一个重要机制。在多个调用者同时请求相同资源时(例如,内存空间),他们会首先共享这个资源,直到某个调用者试图对该资源进行修改时才会为其创建一个私有副本 (private copy)。因此,使用 fork()
创建大量子进程时并不会花费太多时间和占用太多内存资源。
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#define SIZE (128 << 20)
int main() {
// allocate 128MB memory space
char *data = malloc(SIZE);
memset(data, '^', SIZE);
for (int i = 0; i < 999; i++) {
if (fork() == 0)
break;
}
// 1000 processes go here
unsigned int idx = 0;
int fd = open("/dev/urandom", O_RDONLY);
read(fd, &idx, sizeof(idx));
close(fd);
idx %= SIZE;
data[idx] = '_';
printf("pid = %d, write data[%u], %c%c%c\n", getpid(), idx,
data[idx-1], data[idx], data[idx+1]);
sleep(20);
return 0;
}
虚拟内存
只有当一个进程需要访问某个页时,操作系统才需要将这个页分配给进程 (或从磁盘调度入内存)。通过这样一种请求调页(Demand Paging)机制操作系统可以打破物理内存资源的限制。
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <stdint.h>
#include <unistd.h>
#define GB (1024LL * 1024 * 1024)
int main() {
uint8_t *ptr = mmap(NULL, 16*GB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
printf("ptr: %lx\n", (uintptr_t)ptr);
if (ptr == MAP_FAILED) {
printf("mmap failed\n");
exit(-1);
}
*(ptr + 2*GB) = 10;
*(ptr + 4*GB) = 10;
*(ptr + 7*GB) = 10;
printf("ptr + 2*GB -> %d\n", *(ptr + 2*GB));
printf("ptr + 4*GB -> %d\n", *(ptr + 4*GB));
printf("ptr + 6*GB -> %d\n", *(ptr + 6*GB));
return 0;
}