Summary
- How to mix functions you write in C with code you generate in LLVM IR
- Using the C compiler to go “beyond our current capabilities” in LLVM
- Breaking down a hard problem into a more manageable one with the use of functions
Helping LLVM with C
Reading and printing a string
Let’s start with a seemingly-simple goal: write an LLVM IR program that reads a string from standard in, and then just prints it out.
Last week, we talked about the idea of writing C code first to do what
you want, then using the -S -emit-llvm flags to the clang compiler
to see what you get.
Here is a C program that does what we want:
#include <stdio.h>
#include <stdlib.h>
int main() {
// we'll assume for now that no one will type something longar than 256 characters
char* str = malloc(256);
printf("Enter a string: ");
scanf(" %255s", str);
printf("Here's what you entered: %s\n", str);
free(str);
return 0;
}
Compiling this with clang -S -emit-llvm produces an .ll file that has this:
; ModuleID = 'readwrite.c'
source_filename = "readwrite.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@.str = private unnamed_addr constant [17 x i8] c"Enter a string: \00", align 1
@.str.1 = private unnamed_addr constant [7 x i8] c" %255s\00", align 1
@.str.2 = private unnamed_addr constant [29 x i8] c"Here's what you entered: %s\0A\00", align 1
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca ptr, align 8
store i32 0, ptr %1, align 4
%3 = call noalias ptr @malloc(i64 noundef 256) #4
store ptr %3, ptr %2, align 8
%4 = call i32 (ptr, ...) @printf(ptr noundef @.str)
%5 = load ptr, ptr %2, align 8
%6 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str.1, ptr noundef %5)
%7 = load ptr, ptr %2, align 8
%8 = call i32 (ptr, ...) @printf(ptr noundef @.str.2, ptr noundef %7)
%9 = load ptr, ptr %2, align 8
call void @free(ptr noundef %9) #5
ret i32 0
}
; Function Attrs: nounwind allocsize(0)
declare noalias ptr @malloc(i64 noundef) #1
declare i32 @printf(ptr noundef, ...) #2
declare i32 @__isoc99_scanf(ptr noundef, ...) #2
; Function Attrs: nounwind
declare void @free(ptr noundef) #3
; ... and then a whole bunch more attributes
By now, you should be able to shrink this down a bit and simplify it to the bare essentials:
@.str = private unnamed_addr constant [17 x i8] c"Enter a string: \00"
@.str.1 = private unnamed_addr constant [7 x i8] c" %255s\00"
@.str.2 = private unnamed_addr constant [29 x i8] c"Here's what you entered: %s\0A\00"
; Function Attrs: noinline nounwind optnone uwtable
define i32 @main() {
%reg1 = alloca i32
%reg2 = alloca ptr
store i32 0, ptr %reg1
%reg3 = call ptr @malloc(i64 256)
store ptr %reg3, ptr %reg2
%reg4 = call i32 (ptr, ...) @printf(ptr @.str)
%reg5 = load ptr, ptr %reg2
%reg6 = call i32 (ptr, ...) @scanf(ptr @.str.1, ptr %reg5)
%reg7 = load ptr, ptr %reg2
%reg8 = call i32 (ptr, ...) @printf(ptr @.str.2, ptr %reg7)
%reg9 = load ptr, ptr %reg2
call void @free(ptr %reg9)
ret i32 0
}
declare ptr @malloc(i64)
declare i32 @printf(ptr, ...)
declare i32 @scanf(ptr, ...)
declare void @free(ptr)
Notice that I changed the register names from numeric ones %1 etc. to something
slightly different %reg1 etc. This is really helpful when we end up editing this
LLVM code!
And the code above does work.
You can try saving this as a .ll file and then either compiling with clang
or running it directly with lli. And even if you couldn’t write this
yourself from scratch, you should be able to understand every line in
the above LLVM program.
Adding some more functionality
Now what if we want to extend this program to do something more useful? Like say, changing all the lowercase letters to uppercase ones.
We could surely repeat the same process we just did, adding onto our C program, copying out the results, maybe trying to strip things down or simplify it, but it’s going to get painful. For one thing, writing this new program requires some LLVM skills that we haven’t even talked about yet, like loops and if statements. What can we do?
The trick here is to write a function in C that does exactly what we
want. Then we can compile that, directly copy-paste the whole function
definition, and just call that function from our main without having
to get into the details of how it works in LLVM.
First step: write your function in a standalone .c file:
#include <string.h>
void shout(char* str) {
int n = strlen(str);
for (int i = 0; i < n; ++i) {
if (str[i] >= 'a' && str[i] <= 'z') {
str[i] += ('A' - 'a');
}
}
}
Second step: Compile just that one function using clang -S -emit-llvm.
Notice that we couldn’t even compile this to a full program if we wanted
to, since there’s no main! We’re just going to get this one function’s
definition in the resulting .ll file, which should look something like
this:
; ModuleID = 'shout.c'
source_filename = "shout.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @shout(ptr noundef %0) #0 {
%2 = alloca ptr, align 8
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store ptr %0, ptr %2, align 8
%5 = load ptr, ptr %2, align 8
%6 = call i64 @strlen(ptr noundef %5) #2
%7 = trunc i64 %6 to i32
store i32 %7, ptr %3, align 4
store i32 0, ptr %4, align 4
br label %8
8: ; preds = %38, %1
%9 = load i32, ptr %4, align 4
%10 = load i32, ptr %3, align 4
%11 = icmp slt i32 %9, %10
;;;;; And a whole bunch more stuff that we don't need to understand yet! ;;;;;
ret void
}
; Function Attrs: nounwind willreturn memory(read)
declare i64 @strlen(ptr noundef) #1
;;;; and all of the attribute defns that we can also ignore
Now to use this in our program, we just need to call the function from main.
This only requires a small tweak to what we wrote before:
;; copy-paste your helper function LLVM output up here
;; ...
;; ...
;; and now here's our original program, with a one-line modification
@.str = private unnamed_addr constant [17 x i8] c"Enter a string: \00"
@.str.1 = private unnamed_addr constant [7 x i8] c" %255s\00"
@.str.2 = private unnamed_addr constant [29 x i8] c"Here's what you entered: %s\0A\00"
; Function Attrs: noinline nounwind optnone uwtable
define i32 @main() {
%reg1 = alloca i32
%reg2 = alloca ptr
store i32 0, ptr %reg1
%reg3 = call ptr @malloc(i64 256)
store ptr %reg3, ptr %reg2
%reg4 = call i32 (ptr, ...) @printf(ptr @.str)
%reg5 = load ptr, ptr %reg2
%reg6 = call i32 (ptr, ...) @scanf(ptr @.str.1, ptr %reg5)
%reg7 = load ptr, ptr %reg2
call void @shout(ptr %reg7) ; <----- this is the new line we added
%reg8 = call i32 (ptr, ...) @printf(ptr @.str.2, ptr %reg7)
%reg9 = load ptr, ptr %reg2
call void @free(ptr %reg9)
ret i32 0
}
declare ptr @malloc(i64)
declare i32 @printf(ptr, ...)
declare i32 @scanf(ptr, ...)
declare void @free(ptr)
Try it for yourself! We now have one complete source program in LLVM IR that does everything we need.
So in general, the steps are:
- Write your
mainfunction in LLVM directly - Put as much of the “work” into helper functions that you write in a
separate
.cfile - Compile the helper functions and copy/paste the resulting LLVM above
your
main - Now your
maincan call those helper functions!