High Performance Linux



> Try Tempesta FW, a high performance open source application delivery controller for the Linux/x86-64 platform.

> Or check custom high-performance solutions from Tempesta Technologies, INC.

> Careers: if you love low-level C/C++ hacking and Linux, we'll be happy to hear from you.


Tuesday, October 18, 2011

How to debug memory corruptions on old Solaris 8

Here is no problem to cope with memory corruptions on modern UNIX thanks to such great tools as valgrind. But it could be hard task if we're running with old OS like Solaris 8. So it's possible to use following concept (the proof of concept program is below) - just unset write permissions for the pages which possess the corrupted data and get SIGSEGV on actual writing on wrong data. This way we fail exactly on first occurrence of data corruption instead of failing latter on using the wrong data.

Please keep in mind that mprotect() can operate only with page granularity, so it's possible that the page which possess the required data is also possessing other data on which you'll get the segmentation fault exception instead of required data corruption. In such cases we would recommend to allocate the debugged data per individual page which will be protected after.

Also pay attention on pstack trick which could be very useful in number of other cases.

Here is the example how to do the trick with output samples on the top comments:

/*
 * Debug memory corruptions with mprotect(). Could be useful on old UNIX.
 *
 * Compile on Solaris 8 with:
 *  $ CC -g -o mprotect mprotect.c
 *
 * Linux Backtrace output:
 *  write (corrupt) data by 0x607124
 *  segmentation fault at 0x607124
 *  ./mprotect [0x400997]   // signal handler
 *  /lib/libc.so.6 [0x7fcfe99f33a0]
 *  ./mprotect [0x400a5d]   // the culprit!
 *  ./mprotect [0x400b88]
 *  /lib/libc.so.6(__libc_start_main+0xe6) [0x7fcfe99dfa26]
 *  ./mprotect [0x400879]
 *
 * Solaris Backtrace output:
 *  write (corrupt) data by 808650c
 *  segmentation fault at 808650c
 *  3866:   ./mprotect
 *   d0141e75 read     (4, 80894b4, 1400)
 *   d010c25c _filbuf  (8061858, b, d0072a00, d0110846) + d3
 *   d011091c fread    (8046580, 1000, 1, 8061858, 0, 0) + e4
 *   0805113e sigsegv_handler (b, 8047898, 8047698) + 9e
 *   d013d0cf __sighndlr (b, 8047898, 8047698, 80510a0) + f
 *   d01301bf call_user_handler (b) + 2af
 *   d01303ef sigacthandler (b, 8047898, 8047698) + df
 *   --- called from signal handler with signal 11 (SIGSEGV) ---
 *   080511c0 __1cQmemory_corruptor6F_v_ (8047980, d03fc7b4, a, 8061828, d01c0000, 804797c) + 50
 *             ^^^^^^^^^^^^^^^^ the culprit!
 *   080513cf main     (1, 80479c4, 80479cc, 8050f80) + 1ff
 *   0805100d _start   (1, 8047ae0, 0, 8047aeb, 8047b28, 8047b4c) + 7d
 *
 * GDB core dump backtrace (Linux):
 *  Program terminated with signal 11, Segmentation fault.
 *  #0  0x0000000000400a0d in memory_corruptor () at mprotect.c:111
 *  68                      data[PAGE_SIZE * 5 + i] = 0x12;
 */
// #include <execinfo.h> // Only applicable for modern UNIX
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>

#define PAGE_SIZE   getpagesize()
#define PAGE_MASK   (~(PAGE_SIZE - 1))
#define SIZE        (PAGE_SIZE * 8)

char *data;

extern "C" {
void
sigsegv_handler(int sig, siginfo_t *si, void *data)
{
    int i, ret = 1;
    char **btrace;

    printf("segmentation fault at %p\n", si->si_addr);
    fflush(NULL);

    /*
     * The ugly way to print stack on old Solaris.
     */
    char cmd[64], stack[4096];
    snprintf(cmd, 64, "pstack %u", getpid());
    FILE *f = popen(cmd, "r");
    if (!f) {
        perror("popen");
        goto out;
    }
    fread(stack, 4096, 1, f);
    printf(stack);

    /*
     * The better way to do it on modern UNIX (Solaris, Linux, FreeBSD).
     */
#if 0
    void *trace_addrs[200];
    int n_addr = backtrace(trace_addrs,
            sizeof(trace_addrs) / sizeof(trace_addrs[0]));
    if (!n_addr || n_addr == 200) {
        perror("backtrace");
        ret = 2;
        goto out;
    }
    btrace = backtrace_symbols(trace_addrs, n_addr);
    if (!btrace) {
        perror("backtrace_symbols");
        ret = 2;
        goto out;
    }

    for (i = 0; i < n_addr; ++i)
        printf("%s\n", btrace[i]);
    free(btrace);
#endif
out:
    fflush(NULL);
    _exit(ret);
}
}

void
memory_corruptor(void)
{
    int i;

    printf("write (corrupt) data by %p\n", data + PAGE_SIZE * 5 + 100);
    for (i = 100; i < 150; ++i)
        data[PAGE_SIZE * 5 + i] = 0x12;
}

int
main(int argc, char *argv[])
{
    int i;

    struct sigaction sa;
    sigemptyset(&sa.sa_mask);
    sigaddset(&sa.sa_mask, SIGSEGV);
    sa.sa_flags = SA_SIGINFO;
    sa.sa_sigaction = sigsegv_handler;
    sigaction(SIGSEGV, &sa, NULL);

    data = (char*)malloc(SIZE);
    for (i = 0; i < SIZE; ++i)
        data[i] = 0x0a;

    printf("Mapped (%p). Press any key...\n", data);
    fflush(NULL);
    getchar();

    /* That's ok to write into memory without protection. */
    memory_corruptor();

    /*
     * Set memory protection to catch memory writtings.
     * Usually only PROT_READ should be set.
     */
    if (mprotect((char*)((long)(data + PAGE_SIZE * 4) & PAGE_MASK),
                PAGE_SIZE * 4, PROT_READ|PROT_EXEC))
    {
        perror("mprotect");
        exit(1);
    }
    printf("Protected (%p). Press any key...\n",
            (void*)((long)(data + PAGE_SIZE * 4) & PAGE_MASK));
    fflush(NULL);
    getchar();

    memory_corruptor();

    return 0;
}

Wednesday, October 5, 2011

Speaking at HighLoad 2011

Yesterday I was speaking at HighLoad .

In the presentation (in Russian) I concentrated on atomic operations, lock-free data structures, Linux zero-copy network IO and CPU binding. We have got great experience on implementing these cool stuff in our current project (high performance clustering software to process Cisco RDRv1 traffic) for Video International and I was pleased to share basic principles of development of high performance Linux server software.