hpc_debug
hpc_debug
Victor Eijkhout
2022
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
1 Analysis basics
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
2 Compiler options
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
3 Optimization basics
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
5 Multicore / multithread
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
6 Multinode performance
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Logic errors:
functions behave differently from how you thought,
or interact in ways you didn’t envision
Hard to debug
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Coding errors:
send without receive
forget to allocate buffer
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Defensive programming
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
9 Defensive programming
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
10 Memory management
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
11 Modular design
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
13
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
tutorials/gdb/c/hello.c
#include <stdlib.h>
#include <stdio.h>
int main() {
printf("hello world\n");
return 0;
}
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
16 Source listing
%% cc -o hello hello.c
%% gdb hello
GNU gdb 6.3.50-20050815 # ..... version info
(gdb) list
Eijkhout: programming
Defensive programming
Debugging
tutorials/gdb/c/say.c
#include <stdlib.h>
#include <stdio.h>
int main(int argc,char **argv) {
int i;
for (i=0; i<atoi(argv[1]); i++)
printf("hello world\n");
return 0;
}
%% gdb say
.... the usual messages ...
(gdb) run 2
Starting program: /home/eijkhout/tutorials/gdb/c/say 2
Reading symbols for shared libraries +. done
hello world
hello world
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
18 Memory problems 1
// square.c
int nmax,i;
float *squares,sum;
fscanf(stdin,"%d",nmax);
for (i=1; i<=nmax; i++) {
squares[i] = 1./(i*i); sum += squares[i];
}
printf("Sum: %e\n",sum);
%% cc -g -o square square.c
%% ./square
5000
Segmentation fault
19 Stack trace
(gdb) backtrace
#0 0x00007fff824295ca in __svfscanf_l ()
#1 0x00007fff8244011b in fscanf ()
#2 0x0000000100000e89 in main (argc=1, argv=0x7fff5fbfc7c0) at sq
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
21 Out-of-bounds errors
// up.c
int nlocal = 100,i;
double s, *array = (double*) malloc(nlocal*sizeof(double));
for (i=0; i<nlocal; i++) {
double di = (double)i;
array[i] = 1/(di*di);
}
s = 0.;
for (i=nlocal-1; i>=0; i++) {
double di = (double)i;
s += array[i];
}
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
23 Breakpoints
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
24 Stepping
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Memory debugging
Eijkhout: programming
Defensive programming
Debugging
25 Program with problems
Memory debugging
Parallel Debugging
tutorials/gdb/c/square1.c
#include <stdlib.h>
#include <stdio.h>
//codesnippet gdbsquare1c
int main(int argc,char **argv) {
int nmax,i;
float *squares,sum;
fscanf(stdin,"%d",&nmax);
squares = (float*) malloc(nmax*sizeof(float));
for (i=1; i<=nmax; i++) {
squares[i] = 1./(i*i);
sum += squares[i];
}
printf("Sum: %e\n",sum);
//codesnippet end
return 0;
}
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
26 Valgrind output
%% valgrind square1
==53695== Memcheck, a memory error detector
==53695== [stuff]
10
==53695== Invalid write of size 4
==53695== at 0x100000EB0: main (square1.c:10)
==53695== Address 0x10027e148 is 0 bytes after a block of si
==53695== at 0x1000101EF: malloc (vg_replace_malloc.c:236)
==53695== by 0x100000E77: main (square1.c:8)
==53695==
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
27 Debugging
Eijkhout: programming
Defensive programming
Debugging
28 Parallel debuggers
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
29 Buggy code
Eijkhout: programming
Defensive programming
Debugging
30 Parallel inspection
Memory debugging
Parallel Debugging
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
31 Stack trace
Eijkhout: programming
Defensive programming
Debugging
Memory debugging
Parallel Debugging
32 Variable inspection
Eijkhout: programming