Skip to main content
Compilation Toolchain
Compilation Toolchain
IOT
2h

Compilation: Theory

This course covers compilation theory, libraries, debugging, and automation tools essential for embedded systems development.

Introduction

ProgramSoftware / App
1/2 devs> 10 devs
1/2 file> 1k files
500 LoCs1M LoCs
1 arch.n arch.
1 versionn versions + m patches
no depslibs + deps
easy to debugreally not easy
10s to build4h to build

Consequences

  • How to work simultaneously on the same code?
  • How to keep and maintain several versions of your code?
  • How to make debugging easier and systematic?
  • How to automate / optimize compilation and build times?

Outline

  1. Compilation (theory)
  2. Compilation (practical)
  3. Libraries
  4. Code Quality / Debug

Compilation (Theory)

From Source to Executable

ExpectationReality
Source file (.c)Source file (.c)
Pre-processor
Abstract Syntax Tree
Intermediate Code
Assembler Code
Binary Code
Link Editor
ExecutableExecutable

Preprocessing

The preprocessor:

  • Replaces #defines with actual values / instructions
  • Copies included files
gcc -E my_prog.c

Preprocessing - Example

util.h

int compare(int a, int b)
{
    if (a == b) return 0;
    if (a < b)  return 1;
    return -1;
}

my_prog.c

#include "util.h"

#define N 20
#define M 35

int main()
{
    int ret_value = compare(N, M);
    return ret_value;
}

my_prog.e (after preprocessing)

# 1 "<built-in>" 2
# 1 "my_prog.c" 2
# 1 "./util.h" 1
int compare(int a, int b)
{
    if (a == b) return 0;
    if (a < b) return 1;
    return -1;
}
# 2 "my_prog.c" 2
int main()
{
    int ret_value = compare(20, 35);
    return ret_value;
}

Abstract Syntax Tree (AST)

The AST:

  • Is language independent
  • Allows type verification
  • Enables some optimizations
gcc -Xclang -ast-dump -fsyntax-only my_prog.c

AST - Example

[...]
|-FunctionDecl 0x7fd2be01a970 <./util.h:1:1, line:6:1> line:1:5 used compare 'int (int, int)'
| |-ParmVarDecl 0x7fd2be01a818 <col:13, col:17> col:17 used a 'int'
| |-ParmVarDecl 0x7fd2be01a898 <col:20, col:24> col:24 used b 'int'
| `-CompoundStmt 0x7fd2be01ac60 <line:2:1, line:6:1>
|   |-IfStmt 0x7fd2be01ab28 <line:3:5, col:24>
|   | |-BinaryOperator 0x7fd2be01aad8 <col:9, col:14> 'int' '=='
|   | | |-ImplicitCastExpr 0x7fd2be01aaa8 <col:9> 'int' <LValueToRValue>
|   | | | `-DeclRefExpr 0x7fd2be01aa68 <col:9> 'int' lvalue ParmVar 0x7fd2be01a818 'a' 'int'
|   | | `-ImplicitCastExpr 0x7fd2be01aac0 <col:14> 'int' <LValueToRValue>
|   | |   `-DeclRefExpr 0x7fd2be01aa88 <col:14> 'int' lvalue ParmVar 0x7fd2be01a898 'b' 'int'
|   | `-ReturnStmt 0x7fd2be01ab18 <col:17, col:24>
|   |   `-IntegerLiteral 0x7fd2be01aaf8 <col:24> 'int' 0
[...]

Intermediate Code

  • Transforms AST into ASM for an ideal computer (unlimited registers, memory, no latency…)
  • ASM code is very simple and easy to read
gcc -S -emit-llvm my_prog.c

Intermediate Code - Example

; ModuleID = 'my_prog.c'
source_filename = "my_prog.c"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx11.0.0"
; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @compare(i32 %0, i32 %1) #0 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  %5 = alloca i32, align 4
  store i32 %0, i32* %4, align 4
  store i32 %1, i32* %5, align 4
  %6 = load i32, i32* %4, align 4
  %7 = load i32, i32* %5, align 4
  %8 = icmp eq i32 %6, %7
  br i1 %8, label %9, label %10

9:                                                ; preds = %2
  store i32 0, i32* %3, align 4
  br label %16

10:                                               ; preds = %2
  %11 = load i32, i32* %4, align 4
  %12 = load i32, i32* %5, align 4
  %13 = icmp slt i32 %11, %12
  br i1 %13, label %14, label %15

14:                                               ; preds = %10
  store i32 1, i32* %3, align 4
  br label %16

15:                                               ; preds = %10
  store i32 -1, i32* %3, align 4
  br label %16

16:                                               ; preds = %15, %14, %9
  %17 = load i32, i32* %3, align 4
  ret i32 %17
}
; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @main() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  %3 = call i32 @compare(i32 20, i32 35)
  store i32 %3, i32* %2, align 4
  %4 = load i32, i32* %2, align 4
  ret i32 %4
}

Assembler Code

  • Transforms IR into target architecture instructions
  • Requires excellent knowledge of hardware
gcc -S my_prog.c

Assembler Code - Example

my_prog.S

    .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 11, 0 sdk_version 11, 1
    .globl  _compare   ## -- Begin function compare
    .p2align    4, 0x90
_compare:              ## @compare
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    movl    %edi, -8(%rbp)
    movl    %esi, -12(%rbp)
    movl    -8(%rbp), %eax
    cmpl    -12(%rbp), %eax
    jne LBB0_2
## %bb.1:
    movl    $0, -4(%rbp)
    jmp LBB0_5
LBB0_2:
    movl    -8(%rbp), %eax
    cmpl    -12(%rbp), %eax
    jge LBB0_4
## %bb.3:
    movl    $1, -4(%rbp)
    jmp LBB0_5
LBB0_4:
    movl    $-1, -4(%rbp)
LBB0_5:
    movl    -4(%rbp), %eax
    popq    %rbp
    retq
    .cfi_endproc
                   ## -- End function
    .globl  _main      ## @main
    .p2align    4, 0x90
_main:             ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    subq    $16, %rsp
    movl    $0, -4(%rbp)
    movl    $20, %edi
    movl    $35, %esi
    callq   _compare
    movl    %eax, -8(%rbp)
    movl    -8(%rbp), %eax
    addq    $16, %rsp
    popq    %rbp
    retq
    .cfi_endproc
                   ## -- End function
.subsections_via_symbols

Binary Code

Transforms ASM into binary object:

as my_prog.s

Binary Code - Example

my_prog.o

0000000 cf fa ed fe 07 00 00 01 03 00 00 00 01 00 00 00
0000010 04 00 00 00 b8 01 00 00 00 20 00 00 00 00 00 00
0000020 19 00 00 00 38 01 00 00 00 00 00 00 00 00 00 00
...
0000330 50 00 00 00 00 00 00 00 00 5f 6d 61 69 6e 00 5f
0000340 63 6f 6d 70 61 72 65 00

Binary - String Example

str_ex.c

int main()
{
    const char line[] = "Hello SE3 !";
    return 0;
}

str_ex.o (partial)

0000280 65 6c 6c 6f 20 53 45 33 20 21 00 00 00 00 00 00

Verification:

echo "Hello SE3 \!"  | hexdump
# Output: 0000000 48 65 6c 6c 6f 20 53 45 33 20 21 0a

The link editor is required if you have:

  • Multiple object files
  • Libraries to link

ld_util.c

int compare(int a, int b)
{
    if (a == b) return 0;
    if (a < b)  return 1;
    return -1;
}

ld_main.c

#define N 20
#define M 35
int compare(int, int);

int main()
{
    int ret_value = compare(N, M);
    return ret_value;
}

Compiling ld_main.c alone fails:

$ gcc ld_main.c
Undefined symbols for architecture x86_64:
  "_compare", referenced from:
      _main in ld_main-f1b913.o
ld: symbol(s) not found for architecture x86_64
clang: error: linker command failed with exit code 1
                    (use -v to see invocation)

The solution is to compile and link both files:

$ gcc -c ld_main.c
$ gcc -c ld_util.c
$ nm ld_main.o
                 U _compare
0000000000000000 T _main
$ nm ld_util.o
0000000000000000 T _compare
$ gcc ld_main.o ld_util.o
$ nm a.out
0000000100000000 T __mh_execute_header
0000000100003f70 T _compare
0000000100003f40 T _main
                 U dyld_stub_binder

Take Away Messages

Compilation Errors
  • Program compilation is a complex process with lots of steps
  • implicit declaration of function or use of undeclared identifier errors are related to the preprocessor → check your includes or definitions
  • Undefined symbols or Undefined references errors are related to the linker → check your compilation commands and add needed libraries or object files