LinuxQuestions.org
Latest LQ Deal: Latest LQ Deals
Home Forums Tutorials Articles Register
Go Back   LinuxQuestions.org > Forums > Linux Forums > Linux - Software > Linux - Kernel
User Name
Password
Linux - Kernel This forum is for all discussion relating to the Linux kernel.

Notices


Reply
  Search this Thread
Old 06-01-2017, 01:28 PM   #1
rusilja
LQ Newbie
 
Registered: Jun 2017
Posts: 1

Rep: Reputation: Disabled
Circular debugging using ptrace results in deadlock due to race condition?


Hi guys,

As part of my personal research I am facing a challenging problem.
I am trying to let two processes be each other's debuggers using the ptrace syscall. However, my proof-of-concept implementation always results in a deadlock state (both processes get stuck in 't+' state as shown by 'ps aux').

Here is my code, it's pretty simple:
Code:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>

#include <signal.h>
#include <assert.h>

typedef void fun_moved_from_context();
// using namespace std;
void attachTo(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
  printf("\t%s\tattachTo: %ld\n", id, ret);
  if (ret == -1) perror("err: ");
}
void seizeTo(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_SEIZE, pid, NULL, NULL);
  assert(ret > 0);
  printf("\t%s\tseizeTo: %ld\n", id, ret);
}
void detachFrom(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
  printf("\t%s\tdetachFrom: %ld\n", id, ret);
}
void setOptions(pid_t pid, char* id) {
  long ret = ptrace(
      PTRACE_SETOPTIONS, pid, NULL,
      (void*)(PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT |
              PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));
  printf("\t%s\tsetOptions: %ld\n", id, ret);
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, char* id) {
  long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
  printf("\t%s\tsetVarData: %ld\n", id, ret);
}
void cont(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
  printf("\t%s\tcont: %ld\n", id, ret);
}
void interrupt(pid_t pid, char* id) {
  long ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
  printf("\t%s\tinterrupt: %ld\n", id, ret);
}
void debug(int id) {
  int status;
  while (true) {
    printf("\t%s\twhile\n", id);
    sleep(1);
    struct user_regs regs;
    pid_t recv = wait(&status);
    if (recv == -1) {
      printf("\t%s\tDebugger exiting\n", id);
      return 0;
    } else {
      if (WIFSTOPPED(status)) {
        int signal = WSTOPSIG(status);
        switch (signal) {
          case SIGTRAP: {
            int event_code = (status >> 8) ^ SIGTRAP;
            switch (event_code) {
              case PTRACE_EVENT_FORK << 8:
                printf("\t%s\tFORK EVENT.\n", id);
                cont(recv, 2);
                break;
              case PTRACE_EVENT_EXIT << 8:
                printf("\t%s\t%li exited.\n", id, recv);
                return 0;
                break;
              default: {
                printf("\t%s\trecv: %i ; status: %i\n", id, recv, status);
                long ret = ptrace(PTRACE_GETREGS, recv, NULL, &regs);
                regs.uregs[15] += 2;  // addr_size;
                printf("\t%s\tp: new PC: %lx\n", id, regs.uregs[15]);
                ptrace(PTRACE_SETREGS, recv, NULL, &regs);
                cont(recv, id);
              }
            }
          }
        }
      }
    }
  }
}

int main() {
  volatile bool can_runA = false, can_runB = false;
  pid_t procA = getpid();
  volatile pid_t procB = 0;

  if (fork() > 0) {  // process A
    while (!can_runA) {
      printf("\tA\twaiting to continue...\n");
      sleep(1);
    }
    attachTo(procB, "A");
    waitpid(procB, NULL, __WALL);
    setOptions(procB, "A");
    setVarData(procB, &can_runB, 1, "A");
    cont(procB, "A");
    printf("\tA\tfinished\n");
  } else {  // process B
    procB = getpid();
    attachTo(procA, "B");
    waitpid(procA, NULL, __WALL);
    setOptions(procA, "B");
    setVarData(procA, &can_runA, 1, "B");
    setVarData(procA, &procB, procB, "B");
    cont(procA, "B");
    while (!can_runB) {
      printf("\tB\twaiting to continue...\n");
      sleep(1);
    }
    printf("\tB\tfinished\n");
  }  
  return 0;
}
I have compiled and run this on an ARMv7 developer board with kernel version 3.0.35 (Linaro 13.08).

The output of the above code is this:

Code:
  A    waiting to continue... 
  B    attachTo: 0 
  B    setOptions: 0 
  B    setVarData: 0 
  B    setVarData: 0 
  B    cont: 0 
  B    waiting to continue... 
  B    waiting to continue... 
  A    attachTo: 0
As you can see it never reaches the "finished" printf code, and gets stuck as soon as the other process attempts to attach to the debugger.

I have done a similar experiment for 3 processes, such that each one attempts to attach to the other in a circular fashion: A -> B -> C -> A
The result in this case was exactly the same. However, here I was able to detect a race condition, because sometimes the code executed properly without getting stuck in a deadlock (but it's hard to reproduce).

If you wish, you can test this by using a lightweight debugger I've developed and three console terminals. Here's the code:
Code:
/* C standard headers */
#include <errno.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Linux headers */
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
/* Architecture-specific headers */
#include <asm/ptrace.h>
#include <asm/unistd.h>

#include <signal.h>
#include <assert.h>

typedef void fun_moved_from_context();
//using namespace std;
void attachTo(pid_t pid, int id) {
    long ret = ptrace (PTRACE_ATTACH, pid, NULL, NULL);        
    printf("%i  attachTo: %ld\n", id, ret);
}
void seizeTo(pid_t pid, int id) {
    long ret = ptrace (PTRACE_SEIZE, pid, NULL, NULL);       
    //assert(ret > 0); 
    printf("%i  seizeTo: %ld\n", id, ret);        
}
void detachFrom(pid_t pid, int id) {    
    long ret = ptrace (PTRACE_DETACH, pid, NULL, NULL);        
    printf("%i  detachFrom: %ld\n", id, ret);    
}
void setOptions(pid_t pid, int id) {    
    long ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, (void*) (PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK));   
    printf("%i  setOptions: %ld\n", id, ret);    
}
void setVarData(pid_t pid, volatile bool* can_run, void* data, int id) {
    long ret = ptrace(PTRACE_POKEDATA, pid, (void*)can_run, (void*)data);
    printf("%i  setVarData: %ld\n", id, ret);    
}
void cont(pid_t pid, int id) {
    long ret = ptrace (PTRACE_CONT, pid, NULL, NULL); 
    printf("%i  cont: %ld\n", id, ret);    
}
void interrupt(pid_t pid, int id) {
    long ret = ptrace (PTRACE_INTERRUPT, pid, NULL, NULL); 
    printf("%i  interrupt: %ld\n", id, ret);    
}
void debug(int id) {
    int status;  
    while (true) {          
        printf("%i  while\n", id);
        sleep(1);
        struct user_regs regs;    
        pid_t recv = wait(&status);   
        if (recv == -1) {
            printf("%i  Debugger exiting\n", id);
            return 0;
        } else {
            if (WIFSTOPPED(status)) {
                int signal = WSTOPSIG(status);    
                printf("%i  signal: %i\n",id,signal);
                switch (signal) {
                    case SIGTRAP: {                 
                        int event_code = (status >> 8) ^ SIGTRAP;
                        printf("%i  event_code: %i\n",id,event_code);
                        switch (event_code) {
                            case PTRACE_EVENT_FORK << 8:
                                printf("%i  FORK EVENT.\n", id);
                                cont(recv,id);
                                break;
                            case PTRACE_EVENT_EXIT << 8:
                                printf("%i  %li exited.\n", id, recv);
                                return 0;
                                break;
                            default: {
                                printf("%i  recv: %i ; status: %i\n", id, recv, status);
                                long ret=ptrace (PTRACE_GETREGS, recv, NULL, &regs);
                                regs.uregs[15] += 2;//addr_size;
                                printf("%i  p: new PC: %lx\n", id, regs.uregs[15]);
                                ptrace (PTRACE_SETREGS, recv, NULL, &regs);                
                                cont(recv,id); 
                            }
                        }  
                        break;                      
                    }   
                    default: {
                        cont(recv,id); 
                        break;
                    }                
                }
            }

        }         
    } 
}

int main() {    
    int pid;
    int me = getpid();
    printf("Hello, I am %d\n", me);
    printf("pid:");
    scanf("%d",&pid);
    if (pid == 0) {
        printf("bkpt asm\n");
        asm("bkpt");
    } else {
        attachTo(pid, me);
        printf("start waitpid\n");
        waitpid(pid, NULL, __WALL);
        printf("end waitpid\n");
        setOptions(pid, me);  
        cont(pid,me);   
        debug(me);
    }
    return 0;
}
Once you've compiled the above code, you simply run the binary on each console and enter the PID of another process to establish a 3-way circle.


I am far from an expert on the kernel, but I did have a look at the ARM specific kernel implementation which left me puzzled. I couldn't find where/how/why this code does not work.
Now I'm wondering if it's possible at all to make this work without a deadlock occurring? Does anyone have any experience with this, or can provide some clues/feedback?

Thank you greatly for your time, attention and effort!
Ilya
 
  


Reply



Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is Off
HTML code is Off



Similar Threads
Thread Thread Starter Forum Replies Last Post
Touchscreen race condition silvanm Linux - Laptop and Netbook 10 01-21-2017 04:14 PM
race condition in gethostbyname_r dcds Linux - Server 7 02-16-2015 03:39 AM
new OpenSSL use-after-free race condition BenCollver Slackware 3 04-15-2014 11:42 AM
What is race condition? LinuxInfo Programming 1 09-15-2008 09:44 PM
Kded race condition during automounting SpelledJ Slackware 6 10-25-2007 01:51 PM

LinuxQuestions.org > Forums > Linux Forums > Linux - Software > Linux - Kernel

All times are GMT -5. The time now is 06:46 PM.

Main Menu
Advertisement
My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
Open Source Consulting | Domain Registration