线程死锁检测组件逻辑与源码

发布时间：2024年01月02日

死锁介绍

任务的执行体之间互相持有对方所需的资源而不释放，形成了相互制约而都无法继续执行任务的情况，被称为“死锁”。

死锁案例

线程A持有锁a不释放，需要去获取锁b才能继续执行任务，

线程B持有锁b不释放，需要去获取锁c才能继续执行任务，

线程C持有锁c不释放，需要去获取锁d才能继续执行任务，

线程D持有锁d不释放，需要去获取锁a才能继续执行任务。

线程ABCD陷入了逻辑套死，形成了一个环，因此谁都无法继续执行任务。

如何判断形成死锁

判断形成死锁，只需要解决问题：即能判断线程和线程之后互相持有资源不释放，形成了一个环。

如何判断形成环

需要存储两种关系。1：锁和线程之间的关系。2、线程和线程之间的关系。

针对1，锁和线程之间的关系是多对一的，即，一个线程持有多个锁，但是一个锁最多只能能被一个线程持有。我们可以构建一个结构体数组存储这些关系，一个结构体存储着锁地址和线程id，一个结构体代表着一种关系。

针对2，线程和线程之间的关系，表示一个线程想要获取的资源，是否被另一个线程持有？如果被另一个线程持有，那么是哪个线程？需要用有向图记录，有向图有很多种记录方式，在编程中最常体现为邻接表。如图：

这个就是一个邻接表，线程tA想持有的资源在tB中，线程tB想持有的资源在tC中，以此类推，形成了一个环，也就是一个死锁。邻接表的结构是多个链表的头节点被用数组的方式串起来。

死锁检测组件编写逻辑

主要需要解决一下3块逻辑

线程获取锁

线程解锁

线程如果解锁，需要从锁-持有方数组中删除该关系。若邻接表中记录了其他线程在向该线程索取资源，那么删除也删除该关系。

监控环形成

需要另起一个线程负责监控邻接表，定期检查线程之间是否形成了死锁关系，如果形成了死锁关系，那么将这种关系输出到某处，提示程序员需要修改逻辑bug。

检测组件代码以及应用

代码比较长，了解了逻辑之后可以，可以直接在需要检测的函数的最前面调用init_hook()和start_check()这两个接口，以及提前配置好这两个接口所依赖的各层级函数、变量、定义等。

#define _GNU_SOURCE
#include <dlfcn.h>


#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>

#include <stdint.h>


#if 1


typedef unsigned long int uint64;


#define MAX		100

enum Type {PROCESS, RESOURCE};

struct source_type {

	uint64 id;
	enum Type type;

	uint64 lock_id;
	int degress;
};

struct vertex {

	struct source_type s;
	struct vertex *next;

};

struct task_graph {

	struct vertex list[MAX]; //邻接表头节点数组
	int num;//邻接表包含的链表的数量，也就是头节点数量

	struct source_type locklist[MAX]; //锁-持有方数组
	int lockidx; //锁-持有关系的数量

	pthread_mutex_t mutex;
};

struct task_graph *tg = NULL; 
int path[MAX+1];
int visited[MAX];
int k = 0;
int deadlock = 0;
type
struct vertex *create_vertex(struct source_type type) { //其实就是把传入进来的type在堆空间复制了一份并且返回

	struct vertex *tex = (struct vertex *)malloc(sizeof(struct vertex ));

	tex->s = type;
	tex->next = NULL;

	return tex;

}


int search_vertex(struct source_type type) { //查找所需的头节点在邻接表数组中的位置

	int i = 0;

	for (i = 0;i < tg->num;i ++) {

		if (tg->list[i].s.type == type.type && tg->list[i].s.id == type.id) {
			return i;
		}

	}

	return -1;
}

void add_vertex(struct source_type type) { //增加邻接表数组的元素，也就是又来了一个链表要记录了

	if (search_vertex(type) == -1) {

		tg->list[tg->num].s = type;
		tg->list[tg->num].next = NULL;
		tg->num ++;

	}

}


int add_edge(struct source_type from, struct source_type to) { //给邻接表中某只链表增加节点，表示头节点对应的线程所需要的资源又被持有了

	add_vertex(from);
	add_vertex(to);

	struct vertex *v = &(tg->list[search_vertex(from)]);

	while (v->next != NULL) {
		v = v->next;
	}

	v->next = create_vertex(to);

}


int verify_edge(struct source_type i, struct source_type j) { //验证头节点对应的线程是不是在重复持有某个锁

	if (tg->num == 0) return 0;

	int idx = search_vertex(i);
	if (idx == -1) {
		return 0;
	}

	struct vertex *v = &(tg->list[idx]);

	while (v != NULL) {

		if (v->s.id == j.id) return 1;

		v = v->next;
		
	}

	return 0;

}


int remove_edge(struct source_type from, struct source_type to) { //移除邻接表的某个链表的节点，表示线程和线程之间的关系解除

	int idxi = search_vertex(from);
	int idxj = search_vertex(to);

	if (idxi != -1 && idxj != -1) {

		struct vertex *v = &tg->list[idxi];
		struct vertex *remove;

		while (v->next != NULL) {

			if (v->next->s.id == to.id) {

				remove = v->next;
				v->next = v->next->next;

				free(remove);
				break;

			}

			v = v->next;
		}

	}

}


void print_deadlock(void) { //如果形成了死锁，那么打印环

	int i = 0;

	printf("cycle : ");
	for (i = 0;i < k-1;i ++) {

		printf("%ld --> ", tg->list[path[i]].s.id);

	}

	printf("%ld\n", tg->list[path[i]].s.id);

}

int DFS(int idx) {

	struct vertex *ver = &tg->list[idx];
	if (visited[idx] == 1) {

		path[k++] = idx;
		print_deadlock();
		deadlock = 1;
		
		return 0;
	}

	visited[idx] = 1;
	path[k++] = idx;

	while (ver->next != NULL) {

		DFS(search_vertex(ver->next->s));
		k --;
		
		ver = ver->next;

	}

	
	return 1;

}


int search_for_cycle(int idx) { //查询是否形成了环

	

	struct vertex *ver = &tg->list[idx];
	visited[idx] = 1;
	k = 0;
	path[k++] = idx;

	while (ver->next != NULL) {

		int i = 0;
		for (i = 0;i < tg->num;i ++) {
			if (i == idx) continue;
			
			visited[i] = 0;
		}

		for (i = 1;i <= MAX;i ++) {
			path[i] = -1;
		}
		k = 1;

		DFS(search_vertex(ver->next->s));
		ver = ver->next;
	}

}




#endif

// 



#if 1

int search_lock(uint64 lock) { //

	int i = 0;
	
	for (i = 0;i < tg->lockidx;i ++) {
		
		if (tg->locklist[i].lock_id == lock) {
			return i;
		}
	}

	return -1;
}

int search_empty_lock(uint64 lock) {

	int i = 0;
	
	for (i = 0;i < tg->lockidx;i ++) {
		
		if (tg->locklist[i].lock_id == 0) {
			return i;
		}
	}

	return tg->lockidx;

}



void lock_before(uint64_t tid, uint64_t lockaddr) {
	/*
	1. 	if (lockaddr) {
			tid --> lockaddr.tid;
	   	}
	*/

	int idx = 0;

	for (idx = 0;idx < tg->lockidx;idx ++) {

		if (tg->locklist[idx].lock_id == lockaddr) { // 

			struct source_type from;
			from.id = tid;
			from.type = PROCESS;
			add_vertex(from);

			struct source_type to;
			to.id = tg->locklist[idx].id;
			to.type = PROCESS;
			add_vertex(to);

			
			tg->locklist[idx].degress ++;

			if (!verify_edge(from, to))
				add_edge(from, to);

		}

	}
	
	
}

void lock_after(uint64_t tid, uint64_t lockaddr) {

	/*
		if (!lockaddr) {

			tid --> lockaddr;

		} else {

			lockaddr.tid = tid;
			tid -> lockaddr;

		}
		
	 */
	int idx = 0;
	if (-1 == (idx = search_lock(lockaddr))) {// 

		int eidx = search_empty_lock(lockaddr);

		tg->locklist[eidx].id = tid;
		tg->locklist[eidx].lock_id = lockaddr;

		tg->lockidx ++;
		
	} else {

		struct source_type from;
		from.id = tid;
		from.type = PROCESS;
		add_vertex(from);

		struct source_type to;
		to.id = tg->locklist[idx].id;
		to.type = PROCESS;
		add_vertex(to);

		
		tg->locklist[idx].degress --;

		if (verify_edge(from, to))
			remove_edge(from, to);

		tg->locklist[idx].id = tid;
		
	}
	 
	
}


void unlock_after(uint64_t tid, uint64_t lockaddr) {

	// lockaddr.tid = 0;

	int idx = search_lock(lockaddr);

	if (tg->locklist[idx].degress == 0) {
		tg->locklist[idx].id = 0;
		tg->locklist[idx].lock_id = 0;
	}
	
}


void check_dead_lock(void) {

	int i = 0;

	deadlock = 0;
	for (i = 0;i < tg->num;i ++) {
		if (deadlock == 1) break;
		search_for_cycle(i);
	}

	if (deadlock == 0) {
		printf("no deadlock\n");
	}

}


static void *thread_routine(void *args) {

	while (1) {

		sleep(5);
		check_dead_lock();

	}

}


void start_check(void) {

	tg = (struct task_graph*)malloc(sizeof(struct task_graph));
	tg->num = 0;
	tg->lockidx = 0;
	
	pthread_t tid;

	pthread_create(&tid, NULL, thread_routine, NULL);

}


// hook
// define
typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);
pthread_mutex_lock_t pthread_mutex_lock_f = NULL;

typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);
pthread_mutex_unlock_t pthread_mutex_unlock_f = NULL;


// implement
int pthread_mutex_lock(pthread_mutex_t *mutex) {

	
	pthread_t selfid = pthread_self();
	
	lock_before((uint64_t)selfid, (uint64_t)mutex);
	
	pthread_mutex_lock_f(mutex);

	lock_after((uint64_t)selfid, (uint64_t)mutex);

	
}

int pthread_mutex_unlock(pthread_mutex_t *mutex) {

	pthread_mutex_unlock_f(mutex);

	pthread_t selfid = pthread_self();
	unlock_after((uint64_t)selfid, (uint64_t)mutex);
	
}

// init
void init_hook(void) {

	if (!pthread_mutex_lock_f)
		pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock");

	if (!pthread_mutex_unlock_f)
		pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
	
}



#endif

// 

#if 1 //sample

pthread_mutex_t r1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t r2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t r3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t r4 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t r5 = PTHREAD_MUTEX_INITIALIZER;



void *t1_cb(void *arg) {

	printf("t1: %ld\n", pthread_self());

	pthread_mutex_lock(&r1);
	sleep(1);
	pthread_mutex_lock(&r2);


	
	pthread_mutex_unlock(&r2);

	pthread_mutex_unlock(&r1);

}

void *t2_cb(void *arg) {

	printf("t2: %ld\n", pthread_self());

	pthread_mutex_lock(&r2);
	sleep(1);
	pthread_mutex_lock(&r3);

	pthread_mutex_unlock(&r3);
	pthread_mutex_unlock(&r2);

}

void *t3_cb(void *arg) {

	printf("t3: %ld\n", pthread_self());

	pthread_mutex_lock(&r3);
	sleep(1);
	pthread_mutex_lock(&r4);

	pthread_mutex_unlock(&r4);
	pthread_mutex_unlock(&r3);

}

void *t4_cb(void *arg) {

	printf("t4: %ld\n", pthread_self());

	pthread_mutex_lock(&r4);
	sleep(1);
	pthread_mutex_lock(&r5);

	pthread_mutex_unlock(&r5);
	pthread_mutex_unlock(&r4);

}

void *t5_cb(void *arg) {

	printf("t5: %ld\n", pthread_self());

	pthread_mutex_lock(&r1);
	sleep(1);
	pthread_mutex_lock(&r5);

	pthread_mutex_unlock(&r5);
	pthread_mutex_unlock(&r1);

}






// deadlock
// 

int main() {

	init_hook();  //重载lock与unlock函数，保留原有功能的基础上增加lock_before.lock_after,unlock_after操作
	start_check(); //另起一个线程定期检查是否形成死锁

	//形成了死锁
	pthread_t t1, t2, t3, t4, t5;

	pthread_create(&t1, NULL, t1_cb, NULL);
	pthread_create(&t2, NULL, t2_cb, NULL);

	
	
	pthread_create(&t3, NULL, t3_cb, NULL);
	pthread_create(&t4, NULL, t4_cb, NULL);
	pthread_create(&t5, NULL, t5_cb, NULL);


	pthread_join(t1, NULL);
	pthread_join(t2, NULL);
	pthread_join(t3, NULL);
	pthread_join(t4, NULL);
	pthread_join(t5, NULL);

	printf("complete\n");

}

#endif

文章来源:https://blog.csdn.net/weixin_73240939/article/details/135348191
本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若内容造成侵权/违法违规/事实不符，请联系我的编程经验分享网邮箱：chenni525@qq.com进行投诉反馈，一经查实，立即删除！