Circular debugging using ptrace results in deadlock due to race condition?
Linux - KernelThis forum is for all discussion relating to the Linux kernel.
Notices
Welcome to LinuxQuestions.org, a friendly and active Linux Community.
You are currently viewing LQ as a guest. By joining our community you will have the ability to post topics, receive our newsletter, use the advanced search, subscribe to threads and access many other special features. Registration is quick, simple and absolutely free. Join our community today!
Note that registered members see fewer ads, and ContentLink is completely disabled once you log in.
If you have any problems with the registration process or your account login, please contact us. If you need to reset your password, click here.
Having a problem logging in? Please visit this page to clear all LQ-related cookies.
Get a virtual cloud desktop with the Linux distro that you want in less than five minutes with Shells! With over 10 pre-installed distros to choose from, the worry-free installation life is here! Whether you are a digital nomad or just looking for flexibility, Shells can put your Linux machine on the device that you want to use.
Exclusive for LQ members, get up to 45% off per month. Click here for more info.
Circular debugging using ptrace results in deadlock due to race condition?
Hi guys,
As part of my personal research I am facing a challenging problem.
I am trying to let two processes be each other's debuggers using the ptrace syscall. However, my proof-of-concept implementation always results in a deadlock state (both processes get stuck in 't+' state as shown by 'ps aux').
Here is my code, it's pretty simple:
Code:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>
#include <signal.h>
#include <assert.h>
typedef void fun_moved_from_context();
// using namespace std;
void attachTo(pid_t pid, char* id) {
long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
printf("\t%s\tattachTo: %ld\n", id, ret);
if (ret == -1) perror("err: ");
}
void seizeTo(pid_t pid, char* id) {
long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
assert(ret > 0);
printf("\t%s\tseizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, char* id) {
long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
printf("\t%s\tdetachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, char* id) {
long ret = ptrace(
PTRACE_SETOPTIONS, pid, NULL,
(void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
printf("\t%s\tsetOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
printf("\t%s\tsetVarData: %ld\n", id, ret);
}
void cont(pid_t pid, char* id) {
long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
printf("\t%s\tcont: %ld\n", id, ret);
}
void interrupt(pid_t pid, char* id) {
long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
printf("\t%s\tinterrupt: %ld\n", id, ret);
}
void debug(int id) {
int status;
while (true) {
printf("\t%s\twhile\n", id);
sleep(1);
struct user_regs regs;
pid_t recv = wait(&status);
if (recv == -1) {
printf("\t%s\tDebugger exiting\n", id);
return 0;
} else {
if (WIFSTOPPED(status)) {
int signal = WSTOPSIG(status);
switch (signal) {
case SIGTRAP: {
int event_code = (status >> 8) ^ SIGTRAP;
switch (event_code) {
case PTRACE_EVENT_FORK << 8:
printf("\t%s\tFORK EVENT.\n", id);
cont(recv, 2);
break;
case PTRACE_EVENT_EXIT << 8:
printf("\t%s\t%li exited.\n", id, recv);
return 0;
break;
default: {
printf("\t%s\trecv: %i ; status: %i\n", id, recv, status);
long ret = ptrace(PTRACE_GETREGS, recv, NULL, ®s);
regs.uregs[15] += 2; // addr_size;
printf("\t%s\tp: new PC: %lx\n", id, regs.uregs[15]);
ptrace(PTRACE_SETREGS, recv, NULL, ®s);
cont(recv, id);
}
}
}
}
}
}
}
}
int main() {
volatile bool can_runA = false, can_runB = false;
pid_t procA = getpid();
volatile pid_t procB = 0;
if (fork() > 0) { // process A
while (!can_runA) {
printf("\tA\twaiting to continue...\n");
sleep(1);
}
attachTo(procB, "A");
waitpid(procB, NULL, __WALL);
setOptions(procB, "A");
setVarData(procB, &can_runB, 1, "A");
cont(procB, "A");
printf("\tA\tfinished\n");
} else { // process B
procB = getpid();
attachTo(procA, "B");
waitpid(procA, NULL, __WALL);
setOptions(procA, "B");
setVarData(procA, &can_runA, 1, "B");
setVarData(procA, &procB, procB, "B");
cont(procA, "B");
while (!can_runB) {
printf("\tB\twaiting to continue...\n");
sleep(1);
}
printf("\tB\tfinished\n");
}
return 0;
}
I have compiled and run this on an ARMv7 developer board with kernel version 3.0.35 (Linaro 13.08).
The output of the above code is this:
Code:
A waiting to continue...
B attachTo: 0
B setOptions: 0
B setVarData: 0
B setVarData: 0
B cont: 0
B waiting to continue...
B waiting to continue...
A attachTo: 0
As you can see it never reaches the "finished" printf code, and gets stuck as soon as the other process attempts to attach to the debugger.
I have done a similar experiment for 3 processes, such that each one attempts to attach to the other in a circular fashion: A -> B -> C -> A
The result in this case was exactly the same. However, here I was able to detect a race condition, because sometimes the code executed properly without getting stuck in a deadlock (but it's hard to reproduce).
If you wish, you can test this by using a lightweight debugger I've developed and three console terminals. Here's the code:
Code:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>
#include <signal.h>
#include <assert.h>
typedef void fun_moved_from_context();
//using namespace std;
void attachTo(pid_t pid, int id) {
long ret = ptrace (PTRACE_ATTACH, pid, NULL, NULL);
printf("%i attachTo: %ld\n", id, ret);
}
void seizeTo(pid_t pid, int id) {
long ret = ptrace (PTRACE_SEIZE, pid, NULL, NULL);
//assert(ret > 0);
printf("%i seizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, int id) {
long ret = ptrace (PTRACE_DETACH, pid, NULL, NULL);
printf("%i detachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, int id) {
long ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, (void*) (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
printf("%i setOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, int id) {
long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
printf("%i setVarData: %ld\n", id, ret);
}
void cont(pid_t pid, int id) {
long ret = ptrace (PTRACE_CONT, pid, NULL, NULL);
printf("%i cont: %ld\n", id, ret);
}
void interrupt(pid_t pid, int id) {
long ret = ptrace (PTRACE_INTERRUPT, pid, NULL, NULL);
printf("%i interrupt: %ld\n", id, ret);
}
void debug(int id) {
int status;
while (true) {
printf("%i while\n", id);
sleep(1);
struct user_regs regs;
pid_t recv = wait(&status);
if (recv == -1) {
printf("%i Debugger exiting\n", id);
return 0;
} else {
if (WIFSTOPPED(status)) {
int signal = WSTOPSIG(status);
printf("%i signal: %i\n",id,signal);
switch (signal) {
case SIGTRAP: {
int event_code = (status >> 8) ^ SIGTRAP;
printf("%i event_code: %i\n",id,event_code);
switch (event_code) {
case PTRACE_EVENT_FORK << 8:
printf("%i FORK EVENT.\n", id);
cont(recv,id);
break;
case PTRACE_EVENT_EXIT << 8:
printf("%i %li exited.\n", id, recv);
return 0;
break;
default: {
printf("%i recv: %i ; status: %i\n", id, recv, status);
long ret=ptrace (PTRACE_GETREGS, recv, NULL, ®s);
regs.uregs[15] += 2;//addr_size;
printf("%i p: new PC: %lx\n", id, regs.uregs[15]);
ptrace (PTRACE_SETREGS, recv, NULL, ®s);
cont(recv,id);
}
}
break;
}
default: {
cont(recv,id);
break;
}
}
}
}
}
}
int main() {
int pid;
int me = getpid();
printf("Hello, I am %d\n", me);
printf("pid:");
scanf("%d",&pid);
if (pid == 0) {
printf("bkpt asm\n");
asm("bkpt");
} else {
attachTo(pid, me);
printf("start waitpid\n");
waitpid(pid, NULL, __WALL);
printf("end waitpid\n");
setOptions(pid, me);
cont(pid,me);
debug(me);
}
return 0;
}
Once you've compiled the above code, you simply run the binary on each console and enter the PID of another process to establish a 3-way circle.
I am far from an expert on the kernel, but I did have a look at the ARM specific kernel implementation which left me puzzled. I couldn't find where/how/why this code does not work.
Now I'm wondering if it's possible at all to make this work without a deadlock occurring? Does anyone have any experience with this, or can provide some clues/feedback?
Thank you greatly for your time, attention and effort!
Ilya
LinuxQuestions.org is looking for people interested in writing
Editorials, Articles, Reviews, and more. If you'd like to contribute
content, let us know.