Linux Applications Debugging Techniques/Resource leaks

From Wikibooks, open books for an open world
Jump to navigation Jump to search

Zombie threads[edit | edit source]

Any thread that has terminated but has not been joined or detached will leak OS resources until the process terminates. Unfortunately, neither /proc nor gdb will show you these zombie threads, at least not on some kernels.

One way to get them is with a gdb canned command:

#
#
#
define trace_call
    b $arg0
    commands
    bt full
    continue
    end
end
document trace_call
Trace specified call with call stack to screen. Example:
    set breakpoint pending on
    set pagination off
    set logging on
    trace_call __pthread_create_2_1
end
Using host libthread_db library "/lib/i686/cmov/libthread_db.so.1".
(gdb) trace_call __pthread_create_2_1
Function "__pthread_create_2_1" not defined.
Breakpoint 1 (__pthread_create_2_1) pending.
(gdb) trace_call __pthread_create_2_0
Function "__pthread_create_2_0" not defined.
Breakpoint 2 (__pthread_create_2_0) pending.
(gdb) r
Starting program: /home/amelinte/projects/articole/wikibooks/debug/plock foo bar bax
[Thread debugging using libthread_db enabled]
Breakpoint 3 at 0xb7f9b746
Pending breakpoint "__pthread_create_2_1" resolved
Breakpoint 4 at 0xb7f9c395
Pending breakpoint "__pthread_create_2_0" resolved
[New Thread 0xb7e48ad0 (LWP 8635)]
[Switching to Thread 0xb7e48ad0 (LWP 8635)]

Breakpoint 3, 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
#0  0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
No symbol table info available.
#1  0x08048a7f in main (argc=4, argv=0xbfceb714) at plock.c:97
        s = 0
        tnum = 0
        opt = -1
        num_threads = 3
        tinfo = (struct thread_info *) 0x833b008
        attr = {__size = '\0' <repeats 13 times>, "\020", '\0' <repeats 21 times>, __align = 0}
        stack_size = -1
        res = (void *) 0x0
[New Thread 0xb7e47b90 (LWP 8638)]
Thread 1: top of stack near 0xb7e473c8; argv_string=foo

Another way is to use (again) an interposition library:

/*
 *  Hook library. Usage: 
 *    gcc -c -g -Wall -fPIC libhook.c -o libhook.o 
 *    ld -o libhook.so libhook.o -shared -ldl
 *    LD_PRELOAD=./libhook.so program arguments
 * 
 *  Copyright 2012 Aurelian Melinte. 
 *  Released under GPL 3.0 or later. 
 */

#define _GNU_SOURCE
#include <dlfcn.h>

#include <signal.h>
#include <execinfo.h>

#include <errno.h>
#include <stdlib.h>
#include <stdio.h>  /*printf*/
#include <unistd.h>

#include <pthread.h>

#include <assert.h>



typedef int (*lp_pthread_mutex_func)(pthread_mutex_t *mutex);
typedef int (*pthread_create_func)(pthread_t *thread, 
                                   const pthread_attr_t *attr,
								   void *(*start_routine) (void *), void *arg);
static pthread_create_func  _pthread_create_hook = NULL;


static int
hook_one(pthread_create_func *fptr, const char *fname)
{
    char *msg = NULL;

    assert(fname != NULL);

    if (*fptr == NULL) {
        printf("dlsym : wrapping %s\n", fname);
        *fptr = dlsym(RTLD_NEXT, fname);
        printf("next_%s = %p\n", fname, *fptr);
        if ((*fptr == NULL) || ((msg = dlerror()) != NULL)) {
            printf("dlsym %s failed : %s\n", fname, msg);
            return -1;
        } else {
            printf("dlsym: wrapping %s done\n", fname);
            return 0;
        }
    } else {
        return 0;
    }
}


static void
hook_funcs(void)
{
    if (_pthread_create_hook == NULL) {
        int rc = hook_one(&_pthread_create_hook, "pthread_create"); 
        if (NULL == _pthread_create_hook || rc != 0) {
            printf("Failed to hook.\n");
            exit(EXIT_FAILURE);
        }
    }
}


/*
 *
 */
 

int 
pthread_create(pthread_t *thread, 
               const pthread_attr_t *attr,
               void *(*start_routine) (void *), void *arg)
{
#define SIZE 40
    void *buffer[SIZE] = {0};
	int nptrs = 0;

    int rc = EINVAL; 
	
	rc = _pthread_create_hook(thread, attr, start_routine, arg);

    printf("*** pthread_create:\n");
    nptrs = backtrace(buffer, SIZE);
    backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO);

    return rc; 
}

/*
 *
 */
 
void _init()  __attribute__((constructor));
void 
_init()
{
    printf("*** _init().\n");
    hook_funcs();
}


void  _fini()  __attribute__((destructor)); 
void  
_fini()
{
    printf("*** _fini().\n");
}

The output is a bit rough but it can be refined down to file and line by replacing backtrace_symbols_fd() with appropriate code:

*** pthread_create:
./libhook.so(pthread_create+0x8c)[0x400215d3]
./plock[0x8048a7f]
/lib/i686/cmov/libc.so.6(__libc_start_main+0xe0)[0x4006f450]
./plock[0x8048791]

File descriptors[edit | edit source]

As just about anything is a file (folders, sockets, pipes, etc.), just about anything can result in a file descriptor that needs to be closed. /proc can help:

# tree /proc/26041
/proc/26041
...
|-- fd                  # Open files descriptors
|   |-- 0 -> /dev/pts/21
|   |-- 1 -> /dev/pts/21
|   |-- 2 -> /dev/pts/21
|   `-- 3 -> socket:[113497835]
|-- fdinfo
|   |-- 0
|   |-- 1
|   |-- 2
|   `-- 3
...

The trace_call command for gdb can help with the call stack.

If gdb is not available on the machine, an interposition library hooking open(), pipe(), socket(), etc. can be built.

Other tools that can be used:

  • lsof
  • fuser

Ports[edit | edit source]

Which process is using a port? As root:

# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address               Foreign Address             State       PID/Program name   
tcp        0      0 0.0.0.0:36510               0.0.0.0:*                   LISTEN      -                   
tcp        0      0 127.0.0.1:2207              0.0.0.0:*                   LISTEN      3438/python         
...
# lsof
COMMAND     PID             USER   FD      TYPE             DEVICE       SIZE       NODE NAME
init          1             root  cwd       DIR              253,0       4096          2 /
...
python     3438             root    4u     IPv4              11416                   TCP localhost.localdomain:2207 (LISTEN)

# lsof -i :2207
COMMAND  PID USER   FD   TYPE DEVICE SIZE NODE NAME
python  3438 root    4u  IPv4  11416       TCP localhost.localdomain:2207 (LISTEN)

Other tools:

  • fuser

IPC[edit | edit source]

For semaphores, shared memory and message queues.

  • ipcs
  • ipcrm
# ipcs -spt
------ Semaphore Operation/Change Times --------
semid    owner      last-op                    last-changed              
187826177 aurelian_m  Fri Feb 10 09:37:26 2012   Fri Feb 10 09:33:39 2012  
187858946 aurelian_m  Fri Feb 10 09:52:11 2012   Fri Feb 10 09:50:44 2012

DYI: an interposition resource counter[edit | edit source]

libmemleak can be easily modified to keep track of whatever resources are leaking. Hook the right API (e.g. open()/close()).