C Notes DJG
C Notes DJG
C Notes DJG
Basics of C:
• Types, variables, expressions and statements
• Functions, compilation and the pre-processor
• Pointers and structures
• C assessed exercise (tick) details
C Programming Techniques:
• Pointer manipulation: linked lists, trees, and graph algorithms
• Memory management strategies: ownership and lifetimes,
reference counting, tracing, and arenas
• Cache-aware programming: array-of-struct to struct-of-array
transformations, blocking loops, intrusive data structures
• Unsafe behaviour and mitigations: eg, valgrind, asan, ubsan
oo 1
Course Structure, continued
Course organization:
Recommendations for C:
3
The History of C++
4
C is a high-level language with exposed unsafe and low-level
features.
5
The Classic First Program
Compile with
$ cc example1.c
1 #include <stdio.h>
Execute with:
2
6
Basic Types
7
Constants
8
Defining Constant Values
9
Variables
10
Operators
11
Type Conversion
12
Expressions and Statements
13
Blocks and Compound Statements
14
Variable Definition vs Declaration
15
Scope and Type Example (very nasty)
#include <stdio.h>
int main(void) {
extern unsigned char b; /* is this needed? */
double a = 3.4;
{
extern a; /* is this sloppy? */
printf("%d %d\n",b,a+1); /* what will this print? */
}
return 0;
16
}
Arrays and Strings
17
Control Flow
18
Control Flow and String Example
1 #include <stdio.h>
2 #include <string.h>
3
6 int main(void) {
7 char c;
8 int i, j;
9 for (i=0,j=strlen(s)-1;i<j;i++,j--) { // strlen(s)-1 ?
10 c=s[i], s[i]=s[j], s[j]=c;
11 }
12 printf("%s\n",s);
13 return 0;
14 }
19
Goto (often considered harmful)
20
Programming in C and C++
Lecture 2: Functions and the Preprocessor
1
Functions, continued
2
Function Type Gotchas
3
Recursion
4
Compilation
7
Variable and Function Scope with static
8
Address Space Layout
Description Address
Top of address space 0xffff ffff
...
Stack (downwards-growing) typical start 0x7fff ffff
...
Heap (upwards-growing) typical start 0x0020 0000
...
Static variables typical start 0x0010 0000
C binary code typical start 0x0000 8000
...
Null – often trapped 0x000 0000
10
Controlling the Preprocessor Programmatically
11
The #include Directive
12
The #define Directive
13
Defining Macros
1 #include <stdio.h>
2
3 #define PI 3.141592654
4 #define MAX(A,B) ((A)>(B)?(A):(B))
5 #define PERCENT(D) (100*D) /* Wrong? */
6 #define DPRINT(D) printf(#D " = %g\n",D)
7 #define JOIN(A,B) (A ## B)
8
9 int main(void) {
10 const unsigned int a1=3;
11 const unsigned int i = JOIN(a,1);
12 printf("%u %g\n",i, MAX(PI,3.14));
13 DPRINT(MAX(PERCENT(0.32+0.16),PERCENT(0.15+0.48)));
14
15 return 0;
16 } 15
Conditional Preprocessor Directives
17
Error control
18
Programming in C and C++
Lecture 3: Pointers and Structures
1
...
Example
38
00
00
00
0x30
0x34
4c
00
00
00
...
0x4c int i
05
42
1c
52
0x60 char c
41
41
Big
...
Little
2
Manipulating pointers
3
Example
1 #include <stdio.h>
2
3 int main(void) {
4 int x=1,y=2;
5 int *pi;
6 int **ppi;
7
13 return 0;
14 }
4
Pointers and arrays
5
Pointer arithmetic
6
Pointer Arithmetic Example
1 #include <stdio.h>
2
3 int main(void) {
4 char str[] = "A string.";
5 char *pc = str;
6
7 printf("%c %c %c\n",str[0],*pc,pc[3]);
8 pc += 2;
9 printf("%c %c %c\n",*pc, pc[2], pc[5]);
10
11 return 0;
12 }
7
Pointers as function arguments
8
Example
9
Arrays of pointers
10
Diagram of Argument List Layout
argv[1] firstarg\0
argc: 3
argv[2] secondarg\0
argv[3] NULL
11
Multi-dimensional arrays
12
Pointers to functions
13
Function Pointer Example
1 #include <stdio.h>
2 #include "example8.h"
3
4 int main(void) {
5 int a[] = {1,4,3,2,5};
6 unsigned int len = 5;
7 sort(a,len,inc); //or sort(a,len,&inc);
8
14 return 0;
15
15 }
The void * pointer
16
Structure declaration
17
Structure definition
18
Member access
19
Self-referential structures
20
Unions
21
Bit fields
22
Example (adapted from K&R)
1
Example
1 int main(void) {
2 int i = 42, j = 28;
3
3
Inline functions
5
Library support: I/O
6
1 #include <stdio.h>
2 #define BUFSIZE 1024
3
4 int main(void) {
5 FILE *fp;
6 char buffer[BUFSIZE];
7
8 if ((fp=fopen("somefile.txt","rb")) == 0) {
9 perror("fopen error:");
10 return 1;
11 }
12
13 while(!feof(fp)) {
14 int r = fread(buffer,sizeof(char),BUFSIZE,fp);
15 fwrite(buffer,sizeof(char),r,stdout);
16 }
17
18 fclose(fp);
19 return 0;
20 }
7
Library support: dynamic memory allocation
8
Gotchas: operator precedence
1 #include <stdio.h>
2
6 int main(void) {
7
8 test_t a,b;
9 test_t *p[] = {&a,&b};
10 p[0]->i=0;
11 p[1]->i=0;
12 test_t *q = p[0];
13
16 return 0;
17 }
9
Gotchas: Increment Expressions
1 #include <stdio.h>
2
3 int main(void) {
4
5 int i=2;
6 int j=i++ + ++i;
7 printf("%d %d\n",i,j); //What does this print?
8
9 return 0;
10 }
1 #include <stdio.h>
2
11 int main(void) {
12
15 return 0;
16 }
11
Gotchas: local stack (contd.)
1 #include <stdio.h>
2
3 char global[10];
4
14 int main(void) {
15 printf("%s\n",unary(6)); //What does this print?
16 return 0;
17 }
12
Gotchas: careful with pointers
1 #include <stdio.h>
2
5 int main(void) {
6 struct values test2 = {2,3};
7 struct values test1 = {0,1};
8
15 return 0;
16 }
13
Gotchas: XKCD pointers
14
Tricks: Duff’s device
16
Exercise aims
18
Hints: IP header
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
19
Hints: IP header (in C)
1 #include <stdint.h>
2
3 struct ip {
4 uint8_t hlenver;
5 uint8_t tos;
6 uint16_t len;
7 uint16_t id;
8 uint16_t off;
9 uint8_t ttl;
10 uint8_t p;
11 uint16_t sum;
12 uint32_t src;
13 uint32_t dst;
14 };
15
21
Programming in C and C++
Lecture 5: Tooling
1
Undefined and Unspecified Behaviour
2
Tooling and Instrumentation
3
ASan: Address Sanitizer
4
ASan Example #1
1 #include <stdlib.h>
2 #include <stdio.h>
3
5
ASan Example #2
1 #include <stdlib.h>
2
6
ASan Example #3
1 #include <stdlib.h>
2
3 int main(void) {
4 char *s = 1. array is allocated
5 malloc(sizeof(char) * 10);
2. array is freed
6 free(s);
3. array is double-freed
7 free(s);
8 printf("%s", s);
9 return 0;
10 }
7
ASan Limitations
8
MSan: Memory Sanitizer
3 int main(void) {
4 int x[10];
5 printf("%d\n", x[0]); // uninitialized
6 return 0;
7 }
• Accesses to uninitialized variables are undefined
• This does NOT mean that you get some unspecified value
• It means that the compiler is free to do anything it likes
• ASan does not catch uninitialized memory accesses
9
MSan: Memory Sanitizer
1 #include <stdio.h>
2
3 int main(void) {
4 int x[10];
5 printf("%d\n", x[0]); // uninitialized
6 return 0;
7 }
10
MSan Example #1: Stack Allocation
1 #include <stdio.h>
2 #include <stdlib.h> 1. Stack allocate array
3
on line 5
4 int main(int argc, char** argv) {
2. Partially initialize it
5 int a[10];
on line 6
6 a[2] = 0;
3. Access it on line 7
7 if (a[argc])
8 printf("print something\n"); 4. This might or might
9 return 0; not be initialized
10 }
11
MSan Example #2: Heap Allocation
1 #include <stdio.h>
2 #include <stdlib.h>
3
1. Heap allocate array
4 int main(int argc, char** argv) { on line 5
5 int *a = malloc(sizeof(int) * 10); 2. Partially initialize it
6 a[2] = 0; on line 6
7 if (a[argc]) 3. Access it on line 7
8 printf("print something\n"); 4. This might or might
9 free(a); not be initialized
10 return 0;
11 }
12
MSan Limitations
13
UBSan: Undefined Behaviour Sanitizer
14
UBSan Example #1
1 #include <limits.h>
2 1. Signed integer overflow is
3 int main(void) { undefined
4 int n = INT_MAX; 2. So value of m is undefined
5 int m = n + 1; 3. Compile with
6 return 0; -fsanitize=undefined
7 }
15
UBSan Example #2
1 #include <limits.h>
2
1. Division-by-zero is undefined
3 int main(void) {
2. So value of m is undefined
4 int n = 65
5 int m = n / (n - n); 3. Any possible behaviour is
6 return 0; legal!
7 }
16
UBSan Example #3
1 #include <stdlib.h>
2
17
UBSan Limitations
18
Valgrind
19
Valgrind Example
20
Valgrind Limitations
21
Summary
22
Programming in C and C++
Lecture 6: Aliasing, Graphs, and Deallocation
1 / 20
The C API for Dynamic Memory Allocation
2 / 20
One Deallocation Per Path
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 int main(void) {
5 int *pi = malloc(sizeof(int));
6 scanf("%d", pi); // Read an int
7 if (*pi % 2) {
8 printf("Odd!\n");
9 free(pi); // WRONG!
10 }
11 }
3 / 20
One Deallocation Per Path
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 int main(void) {
5 int *pi = malloc(sizeof(int));
6 scanf("%d", pi); // Read an int
7 if (*pi % 2) {
8 printf("Odd!\n");
9 free(pi); // WRONG!
10 }
11 }
• This code fails to deallocate pi if *pi is even
3 / 20
One Deallocation Per Path
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 int main(void) {
5 int *pi = malloc(sizeof(int));
6 scanf("%d", pi); // Read an int
7 if (*pi % 2) {
8 printf("Odd!\n");
9 }
10 free(pi); // OK!
11 }
• This code fails to deallocate pi if *pi is even
• Moving it ensures it always runs
3 / 20
A Tree Data Type
1 struct node {
2 int value;
3 struct node *left;
4 struct node *right;
5 };
6 typedef struct node Tree;
4 / 20
A Tree Data Type
1 // Initialize node2
2 Tree *node2 = node(2, NULL, NULL);
3
4 // Initialize node1
5 Tree *node1 = node(1, node2, node2); // node2 repeated!
6
6 / 20
The shape of the graph
node1
left right
• node1 has two pointers to
node2
node2 • This is a directed acyclic
graph, not a tree.
• tree_free(node1) will call
left right
tree_free(node2) twice!
Null Null
7 / 20
Evaluating free(node1)
node1
1 free(node1);
left right
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 if (node1 != NULL) {
left right 2 tree_free(node1->left);
3 tree_free(node1->right);
4 free(node1);
node2 5 }
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 tree_free(node1->left);
left right 2 tree_free(node1->right);
3 free(node1);
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 tree_free(node2);
left right 2 tree_free(node2);
3 free(node1);
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 if (node2 != NULL) {
left right 2 tree_free(node2->left);
3 tree_free(node2->right);
4 free(node2);
node2 5 }
6 tree_free(node2);
7 free(node1);
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 tree_free(node2->left);
left right 2 tree_free(node2->right);
3 free(node2);
4 tree_free(node2);
node2 5 free(node1);
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 tree_free(NULL);
left right 2 tree_free(NULL);
3 free(node2);
4 tree_free(node2);
node2 5 free(node1);
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 if (NULL != NULL) {
left right 2 tree_free(NULL->left);
3 tree_free(NULL->right);
4 free(node1);
node2 5 }
6 tree_free(NULL);
7 free(node2);
left right 8 tree_free(node2);
9 free(node1);
Null Null
8 / 20
Evaluating free(node1)
node1
1 tree_free(NULL);
left right 2 free(node2);
3 tree_free(node2);
4 free(node1);
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 free(node2);
left right 2 tree_free(node2);
3 free(node1);
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 free(node2);
left right 2 free(node2);
3 free(node1);
node2
left right
Null Null
8 / 20
Evaluating free(node1)
node1
1 free(node2);
left right 2 free(node2);
3 free(node1);
node2 is freed twice!
node2
left right
Null Null
8 / 20
A Tree Data Type which Tracks Visits
1 struct node {
2 bool visited;
3 int value;
4 struct node *left;
5 struct node *right;
6 };
7 typedef struct node Tree;
9 / 20
Creating Nodes of Tree Type
10 / 20
Freeing Nodes of Tree Type, Part 1
14 / 20
Arenas
1 struct node {
2 int value;
3 struct node *left;
4 struct node *right;
5 };
6 typedef struct node Tree;
15 / 20
Arenas
18 / 20
Example
1 arena_t a = make_arena(BIG_NUMBER);
2
19 / 20
Conclusion
20 / 20
Programming in C and C++
Lecture 7: Reference Counting and Garbage Collection
1 / 30
The C API for Dynamic Memory Allocation
2 / 30
A Tree Data Type
1 struct node {
2 int value;
3 struct node *left;
4 struct node *right;
5 };
6 typedef struct node Tree;
3 / 30
Construct Nodes of a Tree
1. We allocate n on line 1
2. On line 2, we create n2 whose left and right fields are n.
3. Hence n2->left and n2->right are said to alias – they are
two pointers aimed at the same block of memory.
5 / 30
The shape of the graph
n2
Null Null
6 / 30
The Idea of Reference Counting
n2: k
7 / 30
How Reference Counting Works
n2: k
1. We start with k references
to n2
left right
n: 2
left right
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
n: 2
left right
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 2
left right
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 2
4. Decrement the reference
count of each thing n2
left right points to
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 1
4. Decrement the reference
count of each thing n2
left right points to
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 0
4. Decrement the reference
count of each thing n2
left right points to
5. Then delete n2
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 0
4. Decrement the reference
count of each thing n2
left right points to
5. Then delete n2
Null Null
8 / 30
How Reference Counting Works
n2: 0
1. We start with k references
to n2
left right
2. Eventually k becomes 0
3. It’s time to delete n2
n: 0
4. Decrement the reference
count of each thing n2
left right points to
5. Then delete n2
6. Recursively delete n
Null Null
8 / 30
The Reference Counting API
1 struct node {
2 unsigned int rc; • We add a field rc to keep
3 int value; track of the references.
4 struct node *left;
• We keep the same node
5 struct node *right;
6 }; constructor interface.
7 typedef struct node Node; • We add a procedure
8 inc_ref to increment the
9 const Node *empty = NULL; reference count of a node.
10 Node *node(int value,
11 Node *left, • We add a procedure
12 Node *right); dec_ref to decrement the
13 void inc_ref(Node *node); reference count of a node.
14 void dec_ref(Node *node);
9 / 30
Reference Counting Implementation: node()
11 / 30
Reference Counting Implementation: dec ref()
12 / 30
Example 1
• complete(n) builds a
complete binary tree of
1 Node *complete(int n) { depth n
2 if (n == 0) { • Sharing makes memory
3 return empty; usage O(n)
4 } else {
5 Node *sub = complete(n-1); • On line 5, makes a recursive
6 Node *result = call to build subtree.
7 node(n, sub, sub); • On line 6, builds the tree
8 dec_ref(sub);
• On line 8, call
9 return result;
10 } dec_ref(sub) to drop the
11 } stack reference sub
• On line 9, don’t call
dec_ref(result)
13 / 30
Example 1 – mistake 1
• If we forget to call
dec_ref(sub), we get a
1 Node *complete(int n) {
memory leak!
2 if (n == 0) {
3 return empty; • sub begins with a refcount
4 } else { of 1
5 Node *sub = complete(n-1);
• node(sub, sub) bumps it
6 Node *result =
to 3
7 node(n, sub, sub);
8 // dec_ref(sub); • If we call
9 return result; dec_ref(complete(n)),
10 } the outer node will get freed
11 }
• But the children will end up
with an rc field of 1
14 / 30
Example 1 – mistake 2
15 / 30
Design Issues with Reference Counting APIs
16 / 30
Mitigations: Careful Use of Getters and Setters
17 / 30
Cycles: A Fundamental Limitation on Reference Counting
1 Node *foo() {
2 Node *n1 = node(1, NULL, NULL);
3 Node *n2 = node(2, NULL, NULL);
4 set_left(n1, n2);
5 set_left(n2, n1);
6 dec_ref(n2);
7 return node1;
8 }
18 / 30
A Cyclic Object Graph
19 / 30
Garbage Collection: Dealing with Cycles
20 / 30
GC API – Data structures
1 struct node {
2 int value;
3 struct node *left; • Node * are node objects,
4 struct node *right;
5 bool mark; but augmented with a mark
6 struct node *next; bit (Lab 5) and a next link
7 };
8 typedef struct node Node; connecting all allocated
9 nodes
10 struct root {
11 Node *start; • A Root * is a node we don’t
12 struct root *next; want to garbage collect.
13 };
14 typedef struct root Root; Roots are also in a linked list
15
• An allocator Alloc * holds
16 struct alloc {
17 Node *nodes; the head of the lists of
18 Root *roots; nodes and roots
19 };
20 typedef struct alloc Alloc;
21 / 30
GC API – Procedures
• make_allocator creates a
fresh allocator
1 Alloc *make_allocator(void); • node(n, l, r, a) creates
2 Node *node(int value,
3 Node *left, a fresh node in allocator a
4 Node *right, (as in the arena API)
5 Alloc *a);
6 Root *root(Node *node, Alloc *a); • root(n) creates a new root
7 void gc(Alloc *a); object rooting the node n
• gc(a) frees all nodes
unreachable from the roots
22 / 30
Creating a Fresh Allocator
1 Alloc *make_allocator(void) {
2 Alloc *a = malloc(sizeof(Alloc));
3 a->roots = NULL;
4 a->nodes = NULL;
5 return a;
6 }
23 / 30
Creating a Node
25 / 30
Implementing a Mark-and-Sweep GC
26 / 30
Marking
29 / 30
Design Considerations
30 / 30
Programming in C and C++
Lecture 8: The Memory Hierarchy and Cache Optimization
1 / 22
Three Simple C Functions
3 / 22
Latencies in the Memory Hierarchy
4 / 22
How Caches Work
5 / 22
Locality: Taking advantage of caching
6 / 22
Pointers Are Expensive
class List<T> {
public T head;
public List<T> tail;
7 / 22
Pointers Are Expensive in C, too
struct data {
int i;
double d;
char c;
};
typedef struct data Data;
struct List {
Data *head;
struct List *tail;
};
9 / 22
Technique #1: Intrusive Lists
11 / 22
Technique #2: Lists of Structs to Arrays of Structs
13 / 22
Technique #3: Arrays of Structs to Struct of Arrays
14 / 22
Technique #3: Traversing Struct of Arrays
15 / 22
Technique #4: Loop Blocking
16 / 22
How Matrices are Laid out in Memory
0 1 4
9
16 25
A,
36 49 64
81 100 121
Address 0 1 2 3 4 5 6 7 8 9 10 11
Value 0 1 4 9 16 25 36 49 64 81 100 121
• A is a 3 × 4 array.
• A(i, j) is at address 3 × i + j (0 based!)
• E.g., A(2, 1) = 49, at address 7
• E.g., A(3, 1) = 100, at address 10
17 / 22
Loop Blocking
18 / 22
How to Block a Loop, Concept
Traversing A Traversing B
a b c d a b c d
e f g h e f g h
i j k l i j k l
m n o p m n o p
19 / 22
How to Block a Loop, Concept
Traversing A Traversing B
a b c d a b c d
e f g h e f g h
i j k l i j k l
m n o p m n o p
20 / 22
Loop Blocking
21 / 22
Conclusion
22 / 22
Programming in C and C++
Lecture 9: Debugging
1 / 34
What is Debugging?
2 / 34
The Runtime Debugging Process
Unit tests are short code fragments written to test code modules in
isolation, typically written by the original developer.
Regression testing ensures that changes do not uncover new bugs,
for example by causing unit tests to fail in an unrelated component.
3 / 34
What is a Bug?
1 #include <netdb.h>
2 #include <stdio.h>
3
Just fix the code to add a NULL pointer check before printing hp.
6 / 34
Debugging via printing values
1 #include <netdb.h>
2 #include <stdio.h>
3
Executing this will always show the output as the program runs.
1 #include <netdb.h>
2 #include <stdio.h>
3
9 / 34
Debugging via printing values (cont.)
The source code is now very ugly and littered with debugging
statements. The C preprocessor comes to the rescue.
• Define a DEBUG parameter to compile your program with.
• #define a debug printf that only runs if DEBUG is non-zero.
• Disabling DEBUG means debugging calls will be optimised
away at compile time.
1 #ifndef DEBUG
2 #define DEBUG 0
3 #endif
4 #define debug_printf(fmt, ...) \
5 do { if (DEBUG) { \
6 fprintf(stderr, fmt, __VA_ARGS__); \
7 fflush(stderr); } } \
8 while (0)
10 / 34
Debugging via printing values (cont.)
1 #include <netdb.h>
2 #include <stdio.h>
3 #ifndef DEBUG
4 #define DEBUG 0
5 #endif
6 #define debug_printf(fmt, ...) \
7 do { if (DEBUG) { fprintf(stderr, fmt, __VA_ARGS__); \
8 fflush(stderr); } } while (0)
9 int main(int argc, char **argv) {
10 debug_printf("argc: %d\n", argc);
11 for (int i=1; i<argc; i++) {
12 struct hostent *hp = gethostbyname(argv[i]);
13 debug_printf("hp: %p\n", hp);
14 printf("%s\n", hp->h_name);
15 }
16 return 0;
17 }
11 / 34
Debugging via Assertions
1 #include <netdb.h>
2 #include <stdio.h>
3 #include <assert.h> // new header file
4
13 / 34
Debugging via Assertions (cont.)
14 / 34
Fault Isolation
15 / 34
Reproducing the Bug
1 #include <netdb.h>
2 #include <stdio.h>
3 #include <assert.h>
4 int main(int argc, char **argv) {
5 struct hostent *hp;
6 hp = gethostbyname(argv[1]);
7 printf("%s\n", hp->h_name);
8 return 0;
9 }
./lookup
Segmentation fault: 11
./lookup doesntexist.abc
Segmentation fault: 11
./lookup www.recoil.org
bark.recoil.org
Both positive and negative results are important to give you more hints
about how many distinct bugs there are, and where their source is.
17 / 34
Isolating the Bug
We now know of two failing inputs, but need to figure out where in
the source code the defect is. From earlier, one solution is to put
assert statements everywhere that we suspect could have a failure.
1 #include <netdb.h>
2 #include <stdio.h>
3 #include <assert.h>
4 int main(int argc, char **argv) {
5 struct hostent *hp;
6 assert(argv[1] != NULL);
7 hp = gethostbyname(argv[1]);
8 assert(hp != NULL);
9 printf("%s\n", hp->h_name);
10 return 0;
11 }
18 / 34
Reproducing the Bug with Assertions
19 / 34
Reproducing the Bug with Assertions (cont.)
./lookup www.recoil.org
bark.recoil.org
The assertions show that there are two distinct failure points in
application, triggered by two separate inputs.
20 / 34
Using Debugging Tools
Let’s use the lldb debugger from LLVM to find the runtime failure
without requiring assertions.
cc -Wall -o lookup -DNDEBUG -g debug-s18.c
Run the binary using lldb instead of executing it directly.
lldb ./lookup
(lldb) target create "./lookup"
Current executable set to ’./lookup’ (x86_64).
Now try running the program with inputs that trigger a crash:
(lldb) run doesntexist.abc
frame #0: 0x0000000100000f52 lookup
main(argc=2, argv=0x00007fff5fbff888) + 50 at debug-s18.c:12
9 assert(argv[1] != NULL);
10 hp = gethostbyname(argv[1]);
11 assert(hp != NULL);
-> 12 printf("%s\n", hp->h\_name);
13
return 0;
The program has halted at line 12 and lets us inspect the value of
variables that are in scope, confirming that the hp pointer is NULL.
(lldb) print hp
(hostent *) $1 = 0x0000000000000000
23 / 34
Example: Using lldb from LLVM (cont.)
The program has run until the main function is encountered, and
stopped at the first statement.
24 / 34
Example: Using lldb from LLVM (cont.)
We can thus:
How did the debugger find the source code in the compiled
executable? Compile it without the -g flag to see what happens.
cc -Wall -DNDEBUG debug-s18.c
(lldb) run doesnotexist.abc
loader’main + 50:
-> 0x100000f52: movq (%rax), %rsi
0x100000f55: movb $0x0, %al
0x100000f57: callq 0x100000f72 ; symbol stub for: printf
0x100000f5c: movl $0x0, %ecx
27 / 34
Debugging Symbols (cont.)
28 / 34
Debugging Symbols (cont.)
lldb is just one of a suite of debugging tools that are useful in bug
hunting.
30 / 34
Unit and Regression Test
1 #include <stdio.h>
2 #include <netdb.h>
3 #include <assert.h>
4 void lookup(char *buf) {
5 assert(buf != NULL);
6 struct hostent *hp = gethostbyname(buf);
7 printf("%s -> %s\n", buf, hp ? hp->h_name : "unknown");
8 }
9 void lookup_test(void) {
10 lookup("google.com");
11 lookup("doesntexist.abc");
12 lookup("");
13 lookup(NULL);
14 }
31 / 34
Unit and Regression Test (cont.)
1 #include <stdlib.h>
2
Can now run this code as a test case or for live lookups.
33 / 34
Unit and Regression Tests (cont.)