在阅读某大佬写的一篇blog,里面有提到LD_PRELOAD技术作为调试和测试手段,学习该知识点,主要参考文章:
Dynamic linker tricks: Using LD_PRELOAD to cheat, inject features and investigate programs

LD_PRELOAD用法

LD_PRELOAD,是个环境变量,用于动态库的加载,动态库加载的优先级最高,一般情况下,其加载顺序为LD_PRELOAD > LD_LIBRARY_PATH > /etc/ld.so.cache > /lib>/usr/lib。程序中我们经常要调用一些外部库的函数,以rand为例,如果我们有个自定义的rand函数,把它编译成动态库后,通过LD_PRELOAD加载,当程序中调用rand函数时,调用的其实是我们自定义的函数,下面以一个例子说明。

示例代码:
random.c

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main(){
  srand(time(NULL));
  int i = 10;
  while(i--) printf("%d\n",rand()%100);
  return 0;
}

执行结果:

[root preload]#gcc -o random  random.c
[root preload]#./random
69
36
52
0
15
24
72
34
71
84

示例代码:
unrandom.c

int rand(){
    return 42; //the most random number in the universe
}

使用如下命令将其编成动态库:

gcc -shared -fPIC unrandom.c -o unrandom.so

然后使用如下方式运行:

LD_PRELOAD=$PWD/unrandom.so ./random_nums
或者
[root preload]#export LD_PRELOAD=$PWD/unrandom.so
[root preload]#./random
运行结果均为:
42
42
42
42
42
42
42
42
42
42

上面的例子说明,我们已经成功将rand函数替换为我们自己所编写的版本。
使用ldd可以查看在两种运行方式下所加载的动态库,当直接运行时由于没有加载unrandom.so,因此会使用原本的rand函数,如果我们指定了LD_PRELOAD=unrandom.so,使用ldd查看所加载的so中有我们自己实现的unrandom.so。由于LD_PRELOAD加载顺序最高,因此会优先使用unrandom.so中的rand函数。
使用nm -D可以列出动态库unrandom.so中的符号。

[root preload]#ldd random
    linux-vdso.so.1 =>  (0x00007fffbd7ec000)
    libc.so.6 => /lib64/libc.so.6 (0x00007fa2ea23d000)
    /lib64/ld-linux-x86-64.so.2 (0x00007fa2ea60a000)
[root preload]#
[root preload]#LD_PRELOAD=$PWD/unrandom.so ldd random
    linux-vdso.so.1 =>  (0x00007ffef61db000)
    /root/workspace/preload/unrandom.so (0x00007fde723b1000)
    libc.so.6 => /lib64/libc.so.6 (0x00007fde71fe4000)
    /lib64/ld-linux-x86-64.so.2 (0x00007fde725b3000)
[root preload]#
[root preload]#
[root preload]#nm -D unrandom.so
0000000000201018 B __bss_start
                 w __cxa_finalize
0000000000201018 D _edata
0000000000201020 B _end
0000000000000620 T _fini
                 w __gmon_start__
00000000000004f0 T _init
                 w _ITM_deregisterTMCloneTable
                 w _ITM_registerTMCloneTable
                 w _Jv_RegisterClasses
0000000000000615 T rand

下面的例子我们想封装一个open函数,在函数内部调用libc中的open函数来实现。

int open(const char *pathname, int flags){
  /* Some evil injected code goes here. */
  return open(pathname,flags); // Here we call the "real" open function, that is provided to us by libc.so
}

如果我们这么写的话这将导致递归调用。
如何在我们自己实现的库中调用真正的open函数呢?
inspect_open.c

#define _GNU_SOURCE
#include <dlfcn.h>
#include <stdio.h>

typedef int (*orig_open_f_type)(const char *pathname, int flags);

int open(const char *pathname, int flags, ...)
{
    /* Some evil injected code goes here. */
    printf("The victim used open(...) to access '%s'!!!\n",pathname); 
    //remember to include stdio.h!
    orig_open_f_type orig_open;
    orig_open = (orig_open_f_type)dlsym(RTLD_NEXT,"open");
    return orig_open(pathname,flags);
}

使用如下方式生成 inspect_open.so

gcc -shared -fPIC -o inspect_open.so inspect_open.c  -ldl

RTLD_NEXT的man手册解释如下:

There are two special pseudo-handles, RTLD_DEFAULT and RTLD_NEXT. The former will find the first occurrence of the desired symbol using the default library search order. The latter will find the next occurrence of a function in the search order after the current library. This allows one to provide a wrapper around a function in another shared library.

man手册的解释非常清晰,RTLD_DEFAULT是在当前库中查找函数,而RTLD_NEXT则是在当前库之后查找第一次出现的函数。
open_example.c

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
    int fd;

    if(2 != argc)
    {
        printf("Usage :  \n");
        return 1;
    }

    errno = 0;
    fd = open(argv[1],O_RDONLY|O_CREAT,S_IRWXU);

    if(-1 == fd)
    {
        printf("open() failed with error [%s]\n",strerror(errno));
        return 1;
    }
    else
    {
        printf("open() Successful.\n");
    }

    return 0;
}

使用如下方式编译

gcc -g -o open_example open_example.c

运行结果:

[root preload]#./open_example random.c
open() Successful.
[root preload]#LD_PRELOAD=$PWD/inspect_open.so ./open_example random.c
The victim used open(...) to access 'random.c'!!!
open() Successful.

hook kill函数

我们想查看系统中所有调用kill函数的地方,添加相应的打印信息,定位哪些进程对哪些使用了kill命令。
函数my_hook_kill.c

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <dlfcn.h>

typedef int(*KILL)(pid_t pid, int sig);

#define TMP_BUF_SIZE 256

/* 获取进程命令行参数 */
void get_cmd_by_pid(pid_t pid, char *cmd)
{
    char buf[TMP_BUF_SIZE];
    int i = 0;

    snprintf(buf, TMP_BUF_SIZE, "/proc/%d/cmdline", pid);
    FILE* fp = fopen(buf, "r");
    if(fp == NULL)
    {
        return;
    }

    memset(buf, 0, TMP_BUF_SIZE);
    size_t ret = fread(cmd, 1, TMP_BUF_SIZE - 1, fp);
    
    /*
    *需要下面for循环的原因是
    *man手册资料
    *This  holds  the  complete command line for the process, unless the process is a zombie.  
    *In the latter case,there is nothing in this file: that is, a read on this file will return 0 
    *characters.  The command-line arguments appear in this file as a set of strings separated by 
    *null bytes ('\0'), with a further null byte after the last string.
    */

    for (i = 0; ret != 0 && i < ret - 1; i++)
    {
        if (cmd[i] == '\0')
        {
            cmd[i] = ' ';
        }
    }

    fclose(fp);
    cmd[TMP_BUF_SIZE - 1] = '\0';
}

int kill(pid_t pid, int sig)
{
    static KILL orign_kill = NULL;

    //接收kill命令的进程信息
    char buf_des[TMP_BUF_SIZE] = {0};
    get_cmd_by_pid(pid, buf_des);

    //获取当前进程信息
    char buf_org[TMP_BUF_SIZE] = {0};
    get_cmd_by_pid(getpid(), buf_org);

    //获取父进程信息
    char buf_porg[TMP_BUF_SIZE] = {0};
    get_cmd_by_pid(getppid(), buf_porg);

    printf("hook kill(sig:%d): [%s(%d) -> %s(%d)] -> [%s(%d)]\n",
           sig, buf_porg, getppid(), buf_org, getpid(), buf_des, pid);
out:
    if(!orign_kill){
        orign_kill = (KILL)dlsym(RTLD_NEXT, "kill");
    }

    return orign_kill(pid, sig);
}

使用如下命令编译成动态库:

gcc -shared -fPIC -o my_hook_kill.so my_hook_kill.c -ldl

singal_example.c

#include<stdio.h>
#include<signal.h>
#include<unistd.h>

void sig_handler(int signo)
{
  if (signo == SIGINT)
    printf("received SIGINT\n");
}

int main(void)
{
  if (signal(SIGINT, sig_handler) == SIG_ERR)
  printf("\ncan't catch SIGINT\n");
  // A long long wait so that we can easily issue a signal to this process
  while(1)
    sleep(1);
  return 0;
}

使用如下方式编译和运行,并记住进程ID(此例中为2389)

gcc -g -o singal_example singal_example.c
[root hook_kill]#./singal_example &
[1] 2389

然后我们使用TLPI书中的一个例子,如果需要运行需要下载完整源代码:
t_kill.c

/*************************************************************************\
*                  Copyright (C) Michael Kerrisk, 2018.                   *
*                                                                         *
* This program is free software. You may use, modify, and redistribute it *
* under the terms of the GNU General Public License as published by the   *
* Free Software Foundation, either version 3 or (at your option) any      *
* later version. This program is distributed without any warranty.  See   *
* the file COPYING.gpl-v3 for details.                                    *
\*************************************************************************/

/* Listing 20-3 */

/* t_kill.c

   Send a signal using kill(2) and analyze the return status of the call.
*/
#include <signal.h>
#include "tlpi_hdr.h"

int
main(int argc, char *argv[])
{
    int s, sig;

    if (argc != 3 || strcmp(argv[1], "--help") == 0)
        usageErr("%s pid sig-num\n", argv[0]);

    sig = getInt(argv[2], 0, "sig-num");

    s = kill(getLong(argv[1], 0, "pid"), sig);

    if (sig != 0) {
        if (s == -1)
            errExit("kill");

    } else {                    /* Null signal: process existence check */
        if (s == 0) {
            printf("Process exists and we can send it a signal\n");
        } else {
            if (errno == EPERM)
                printf("Process exists, but we don't have "
                       "permission to send it a signal\n");
            else if (errno == ESRCH)
                printf("Process does not exist\n");
            else
                errExit("kill");
        }
    }

    exit(EXIT_SUCCESS);
}

执行结果:

如果不使用我们编写的kill函数,运行结果如下:
[root signals]# ./t_kill 2389 2
received SIGINT

使用我们编写的kill函数,运行结果如下:
[root signals]#LD_PRELOAD=/root/workspace/hook/hook_kill/my_hook_kill.so ./t_kill 2389 2
hook kill(sig:2): [-bash(2229) -> ./t_kill 2389 2(2401)] -> [./singal_example(2389)]
received SIGINT