0% found this document useful (0 votes)
2 views25 pages

Lab2023 9 String Instructions

The document outlines a lab focused on string instructions in assembly language, detailing objectives, prerequisites, tools, and duration. It explains various x86 string instructions such as MOVS, LODS, STOS, CMPS, and SCAS, along with examples and tasks for practical application. The lab includes exercises for learners to implement string manipulation and comparison techniques using assembly language.

Uploaded by

20133027
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views25 pages

Lab2023 9 String Instructions

The document outlines a lab focused on string instructions in assembly language, detailing objectives, prerequisites, tools, and duration. It explains various x86 string instructions such as MOVS, LODS, STOS, CMPS, and SCAS, along with examples and tasks for practical application. The lab includes exercises for learners to implement string manipulation and comparison techniques using assembly language.

Uploaded by

20133027
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 25

Lab9: String Instructions

1. Objectives: after completing this lab, learner will be able to:


- Use string instructions

2. Prerequest
- review the theory of assembly language

3. Tools
- Computer with installed softwares

4. Duration
- 4 hours

5. Contents:
5.1. Introduction
The x86 instruction set has five groups of instructions (MOVS, LODS, STOS, CMPS, SCAS) for
processing arrays of bytes, words, doublewords and quad words. Each instruction implicitly
uses Source Index (RSI) register, Destination Index (RDI) register, or both the registers to
address memory. References to the accumulator imply the use of AL, AX, EAX or RAX,
depending on the instruction data size. After each instruction execution, RSI and RDI are
incremented if the direction flag is set, and decremented otherwise. String instructions execute
efficiently because they automatically increment array indexes. These instructions can move
data from memory to memory without requiring register use. These instructions can have
repeat prefixes (REP, REPE, REPNE, REPZ, REPNZ) to perform a task repeatedly, without needing
jump or loop instructions.

5.2. Repeat and String Instructions


MOV moves data from the source string to the destination string.
CMP compares data between the source and destination strings (in x86, comparison is
basically subtraction which affects the EFLAGS register).

Pages - 1
Computer architecture labs
LOD loads data from the string pointed to by RSI into RAX.
LOD
STO stores data from RAX into the string pointed to by RDI.
SCA scans the data in the string pointed to by RDI and compares it to RAX (again, along with
affecting EFLAGS).
REP: Repeat until RCX equals 0.
REPE, REPZ: Repeat until RCX equals 0 or till the zero flag is set.
REPNE, REPNZ: Repeat until ECX equals 0 or till the zero flag is unset.
CLD: Clear direction flag. RSI and RDI incremented after each string instruction.
STD: Set the direction flag. RSI and RDI decremented after each string instruction.
6. Examples
6.1. Cmpstest1
Cmpstest.nasm
;;; A simple example of the CMPS instruction
SYS_EXIT equ 60
section .data
value1:
db "Test"
value2:
db "Test"
section .text
global _start
_start:
nop
mov eax,SYS_EXIT
lea rsi,[value1]
lea rdi,[value2]
cld
cmpsd
je equal
mov rdi,1
syscall
equal:
mov rdi,0
syscall
Pages - 2
Computer architecture labs
;;; *EOF*
✓ Compile and run program
cmpstest1: cmpstest1.nasm
nasm -f elf64 -g -F dwarf cmpstest1.nasm
ld -o cmpstest1 cmpstest1.o
✓ Using gdb to see result of each instruction
6.2. Cmpstest2
Cmpstest2.asm
;;; An example of using the REPE CMPS instruction
SYS_EXIT equ 60
section .data
value1:
db "This is a test of the CMPS instructions"
value2:
db "This is a test of the CMPS Instructions"
section .text
global _start
_start:
nop
mov eax,SYS_EXIT
lea rsi,[value1]
lea rdi,[value2]
mov rcx,39
cld
repe cmpsb
je equal
mov rdi,rcx
syscall
equal:
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run
cmpstest2: cmpstest2.asm
nasm -f elf64 -g -F dwarf cmpstest2.asm
ld -o cmpstest2 cmpstest2.o
6.3. Convert example
;Convert.nasm
;;; Converting lower to upper case
section .data
string1:
db `This is a TEST, of the conversion program!\n`,0
length:
dq 43
Pages - 3
Computer architecture labs
section .text
global _start
extern exit,printf
_start:
nop
lea rsi,[string1]
mov rdi,rsi
mov rcx,[length]
cld
loop1:
lodsb
cmp al,'a'
jl skip
cmp al,'z'
jg skip
sub al,0x20
skip:
stosb
loop loop1
end:
mov rdi,string1
mov rax,0
call printf
mov rdi,0
call exit

;;; *EOF*
✓ Compile and run program
convert: convert.nasm
nasm -f elf64 -g -F dwarf convert.nasm
ld --dynamic-linker /lib64/ld-linux-x86-64.so.2 -o
convert convert.o -lc
./convert
THIS IS A TEST, OF THE CONVERSION PROGRAM!
6.4. Movtest1
Movstest1.asm
;;; An example of the MOVS instructions
SYS_EXIT equ 60
section .data
value1 db `This is a test string.\n`
section .bss
output resb 23
section .text
global _start
_start:
nop
lea rsi,[value1]

Pages - 4
Computer architecture labs
lea rdi,[output]
movsb
movsw
movsd

mov rax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
movstest1: movstest1.asm
nasm -f elf64 -g -F dwarf movstest1.asm
ld -o movstest1 movstest1.o
6.5. Movtest 2
Movstest2.asm
;;; A second example of the MOVS instructions
SYS_EXIT equ 60
section .data
value1 db `This is a test string.\n`
section .bss
output resb 23
section .text
global _start
_start:
nop
lea rsi,[value1 + 22]
lea rdi,[output + 22]
std
movsb
movsw
movsd

mov rax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
movstest2: movstest2.asm
nasm -f elf64 -g -F dwarf movstest2.asm
ld -o movstest2 movstest2.o
6.6. Reptest1
Reptest1.asm
;;; An example of the REP instruction
SYS_EXIT equ 60
section .data
Pages - 5
Computer architecture labs
value1 db `This is a test string.\n`
section .bss
output resb 23
section .text
global _start
_start:
nop
lea rsi,[value1]
lea rdi,[output]
mov ecx,23
cld
rep movsb

mov rax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
reptest1: reptest1.asm
nasm -f elf64 -g -F dwarf reptest1.asm
ld -o reptest1 reptest1.o
6.7. Reptest2
Reptest2.asm
;;; An incorrect example of using the REP instruction
SYS_EXIT equ 60
section .data
value1 db `This is a test string.\n`
value2 db "Oops"
section .bss
output resb 23
section .text
global _start
_start:
nop
lea rsi,[value1]
lea rdi,[output]
mov ecx,6
cld
rep movsd

mov rax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
reptest2: reptest2.asm
Pages - 6
Computer architecture labs
nasm -g -f elf64 -F dwarf reptest2.asm
ld -o reptest2 reptest2.o

6.8. Scanstest1
Scanstest1.asm
;;; An example of the SCAS instruction
SYS_EXIT equ 60
section .data
string1 db 'This is a test - a long text string to scan.'
length dd 44
string2 db '-'
section .text
global _start
_start:
nop
lea rdi,[string1]
lea rsi,[string2]
mov ecx,[length]
lodsb
cld
repne scasb
jne notfound
sub cx,[length] ;!!!
neg cx
mov eax,SYS_EXIT
syscall

notfound:
mov eax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
scastest1: scastest1.asm
nasm -f elf64 -g -F dwarf scastest1.asm
ld -o scastest1 scastest1.o
6.9. Scanstest2
Scanstest2.asm
;;; An example of incorrectly using the SCAS instruction
SYS_EXIT equ 60
section .data
string1 db 'This is a test - a long text string to scan.'
length dd 11
string2 db 'test'
section .text
global _start
Pages - 7
Computer architecture labs
_start:
nop
lea rdi,[string1]
lea rsi,[string2]
mov ecx,[length]
lodsd
cld
repne scasd
jne notfound
sub cx,[length] ;!!!
neg cx
mov eax,SYS_EXIT
mov rdi,rcx
syscall

notfound:
mov eax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
✓ Compile and run program
scastest2: scastest2.asm
nasm -f elf64 -g -F dwarf scastest2.asm
ld -o scastest2 scastest2.o
6.10. Storestest1
Storstest1.asm
;;; An example of using the STOS instruction
SYS_EXIT equ 60
section .data
space db ' '
section .bss
buffer resb 256
section .text
global _start
_start:
nop
lea rsi,[space]
lea rdi,[buffer]
mov ecx,256
cld
lodsb
rep stosb

mov eax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*
Pages - 8
Computer architecture labs
✓ Compile and run program
stostest1: stostest1.asm
nasm -f elf64 -g -F dwarf stostest1.asm
ld -o stostest1 stostest1.o
6.11. Strcmp
Strcmp.asm
;;; An example of comparing strings
SYS_EXIT equ 60
section .data
string1 db "test"
length1 dd 4
string2 db "test1"
length2 dd 5
section .text
global _start
_start:
nop
lea rsi,[string1]
lea rdi,[string2]
mov ecx,[length1]
mov eax,[length2]
cmp eax,ecx
ja longer
xchg eax,ecx
longer:
cld
repe cmpsb
je equal
jg greater
less:
mov eax,SYS_EXIT
mov rdi,255
syscall
greater:
mov eax,SYS_EXIT
mov rdi,1
syscall
equal:
mov ecx,[length1]
mov eax,[length2]
cmp ecx,eax
jg greater
jl less
mov eax,SYS_EXIT
mov rdi,0
syscall

Pages - 9
Computer architecture labs
;;; *EOF*
✓ Compile and run program
strcomp: strcomp.asm
nasm -f elf64 -g -F dwarf strcomp.asm
ld -o strcomp strcomp.o
6.12. Stringsize
Strsize.asm
;;; Finding the size of a string using the SCAS instruction
SYS_EXIT equ 60
section .data
string1 db `Testing, one, two, three, testing.\n`,0
section .text
global _start
_start:
nop
lea rdi,[string1]
mov ecx,0xffff
mov al,0
cld
repne scasb
jne notfound
sub cx,0xffff
neg cx
dec cx
mov eax,SYS_EXIT
mov rdi,rcx
syscall

notfound:
mov eax,SYS_EXIT
mov rdi,0
syscall

;;; *EOF*

✓ Compile and run program


strsize: strsize.asm
nasm -f elf64 -g -F dwarf strsize.asm
ld -o strsize strsize.o

6.13. Example 1
Following program copies the string a to string b using movsb instruction

Pages - 10
Computer architecture labs
Question 1: What are the value of RSI and RDI at start of the program?
Answer:
Question 2: Show contents of memory (address+data) of string b at start of the program.
Answer:
Question 3: What are the value of RSI and RDI after line 14?
Answer:
Question 4: What are the value of RSI and RDI after line 18?
Answer:
Question 5: Show contents of memory (address+data) of string b after line 18 is executed?
Answer:
6.14. Example 2
Change the above program to copy the string using repeat instruction

Pages - 11
Computer architecture labs
Question 6: Repeat the above program by defining the string b as uninitialized data. Why is the
value of RDI different in this case?

Pages - 12
Computer architecture labs
6.15. Example 3
Copy string backwards by setting the direction flag.

6.16. Example 4
Question 7: Why add 4 to RSI and RDI at lines 15 and 17?
Answer:
Use scansb to calculate the length of string. End of string is marked by $. This is placed in
AL register.

Pages - 13
Computer architecture labs
Question 8: What is the value of RCX before execution of line 25?
Answer:
Question 9: If string is replaced with “Paki$tan” what would be the value of RCX after
execution of line 25?
Answer:
Question 10: The following example scans the string w. Note the value of AL register after each
iteration of lodsb instruction.

Pages - 14
Computer architecture labs
6.17. Example 5
Answer:
Example: Use stosb string instruction to populate an array.

Pages - 15
Computer architecture labs
Pages - 16
Computer architecture labs
7. Lab tasks

1 Lab Tasks:
Task1: Write a program which defines strings a = “BANANA” in data section, and display the
count of number of times the letter “A” appears in the string. Program should work if run
with another string.
Task 2: A character string STRING1 comes before another string STRING2 in dictionary if
A. the first character of STRING1 comes before the first character of STRING2
B. the first N-1 characters of the strings are identical, but the Nth character of STRING1 comes
before the Nth character of STRING2, or
C. STRING1 matches the beginning of STRING2 but STRING2 is longer.

Pages - 17
Computer architecture labs
You are to write a program which defines two-character strings a and b in data section, and
decides which string comes first alphabetically or if the strings are identical.

8. Reverse string
Of course when we talk about assembly programming language we can’t talk about
string data type, actually we’re dealing with array of bytes. Let’s try to write simple
example, we will define string data and try to reverse and write result to stdout. This
tasks seems pretty simple and popular when we start to learn new programming
language. Let’s look on implementation.
First of all, I define initialized data. It will be placed in data section (You can read about
sections in part):

section .data
SYS_WRITE equ 1
STD_OUT equ 1
SYS_EXIT equ 60
EXIT_CODE equ 0

NEW_LINE db 0xa
INPUT db "Hello world!"

Here we can see four constants:

• SYS_WRITE - ‘write’ syscall number


• STD_OUT - stdout file descriptor
• SYS_EXIT - ‘exit’ syscall number
• EXIT_CODE - exit code

syscall list you can find - here. Also there defined:

• NEW_LINE - new line (\n) symbol


• INPUT - our input string, which we will reverse

Next we define bss section for our buffer, where we will put reversed string:

section .bss
OUTPUT resb 12

Ok we have some data and buffer where to put result, now we can define text section for
code. Let’s start from main _start routine:

_start:
mov rsi, INPUT
xor rcx, rcx
Pages - 18
Computer architecture labs
cld
mov rdi, $ + 15
call calculateStrLength
xor rax, rax
xor rdi, rdi
jmp reverseStr
Here are some new things. Let’s see how it works: First of all we put INPUT address to si
register at line 2, as we did for writing to stdout and write zeros to rcx register, it will be
counter for calculating length of our string. At line 4 we can see cld operator. It resets df flag
to zero. We need in it because when we will calculate length of string, we will go through
symbols of this string, and if df flag will be 0, we will handle symbols of string from left to
right. Next we call calculateStrLength function. I missed line 5 with mov rdi, $ + 15 instruction,
i will tell about it little later. And now let’s look at calculateStrLength implementation:

calculateStrLength:
;; check is it end of string
cmp byte [rsi], 0
;; if yes exit from function
je exitFromRoutine
;; load byte from rsi to al and inc rsi
lodsb
;; push symbol to stack
push rax
;; increase counter
inc rcx
;; loop again
jmp calculateStrLength

As you can understand by it’s name, it just calculates length of INPUT string and store result
in rcx register. First of all we check that rsi register doesn’t point to zero, if so this is the end
of string and we can exit from function. Next is lodsb instruction. It’s simple, it just put 1 byte
to al register (low part of 16 bit ax) and changes rsi pointer. As we executed cld instruction,
lodsb everytime will move rsi to one byte from left to right, so we will move by string symbols.
After it we push rax value to stack, now it contains symbol from our string (lodsb puts byte
from si to al, al is low 8 bit of rax). Why we did push symbol to stack? You must remember
how stack works, it works by principle LIFO (last input, first output). It is very good for us. We
will take first symbol from si, push it to stack, than second and so on. So there will be last
symbol of string at the stack top. Than we just pop symbol by symbol from stack and write
to OUTPUT buffer. After it we increment our counter (rcx) and loop again to the start of
routine.

Ok, we pushed all symbols from string to stack, now we can jump to exitFromRoutine
return to _start there. How to do it? We have ret instruction for this. But if code will be like
this:

Pages - 19
Computer architecture labs
exitFromRoutine:
;; return to _start
ret

It will not work. Why? It is tricky. Remember we called calculateStrLength at _start. What
occurs when we call a function? First of all function’s parameters pushes to stack from right
to left. After it return address pushes to stack. So function will know where to return after end
of execution. But look at calculateStrLength, we pushed symbols from our string to stack and
now there is no return address of stack top and function doesn’t know where to return. How
to be with it. Now we must take a look to the weird instruction before call:

mov rdi, $ + 15

First all:

• $ - returns position in memory of string where $ defined


• $$ - returns position in memory of current section start

So we have position of mov rdi, $ + 15, but why we add 15 here? Look, we need to know
position of next line after calculateStrLength. Let’s open our file with objdump util:

objdump -D reverse

reverse: file format elf64-x86-64

Disassembly of section .text:

00000000004000b0 <_start>:
4000b0: 48 be 41 01 60 00 00 movabs $0x600141,%rsi
4000b7: 00 00 00
4000ba: 48 31 c9 xor %rcx,%rcx
4000bd: fc cld
4000be: 48 bf cd 00 40 00 00 movabs $0x4000cd,%rdi
4000c5: 00 00 00
4000c8: e8 08 00 00 00 callq 4000d5
<calculateStrLength>
4000cd: 48 31 c0 xor %rax,%rax
4000d0: 48 31 ff xor %rdi,%rdi
4000d3: eb 0e jmp 4000e3 <reverseStr>

We can see here that line 12 (our mov rdi, $ + 15) takes 10 bytes and function call at line 16 -
5 bytes, so it takes 15 bytes. That’s why our return address will be mov rdi, $ + 15. Now we
can push return address from rdi to stack and return from function:

exitFromRoutine:

Pages - 20
Computer architecture labs
;; push return addres to stack again
push rdi
;; return to _start
ret

Now we return to start. After call of the calculateStrLength we write zeros to rax and
rdi and jump to reverseStr label. It’s implementation is following:

reverseStr:
cmp rcx, 0
je printResult
pop rax
mov [OUTPUT + rdi], rax
dec rcx
inc rdi
jmp reverseStr

Here we check our counter which is length of string and if it is zero we wrote all symbols to
buffer and can print it. After checking counter we pop from stack to rax register first symbol
and write it to OUTPUT buffer. We add rdi because in other way we’ll write symbol to first
byte of buffer. After this we increase rdi for moving next by OUTPUT buffer, decrease length
counter and jump to the start of label.

After execution of reverseStr we have reversed string in OUTPUT buffer and can write result
to stdout with new line:

printResult:
mov rdx, rdi
mov rax, 1
mov rdi, 1
mov rsi, OUTPUT
syscall
jmp printNewLine

printNewLine:
mov rax, SYS_WRITE
mov rdi, STD_OUT
mov rsi, NEW_LINE
mov rdx, 1
syscall
jmp exit

and exit from the our program:

exit:
mov rax, SYS_EXIT
Pages - 21
Computer architecture labs
mov rdi, EXIT_CODE
syscall

That’s all, now we can compile our program with:

all:
nasm -g -f elf64 -o reverse.o reverse.asm
ld -o reverse reverse.o

clean:
rm reverse reverse.o

and run it:

1.1 String operations

Of course there are many other instructions for string/bytes manipulations:

• REP - repeat while rcx is not zero


• MOVSB - copy a string of bytes (MOVSW, MOVSD and etc..)
• CMPSB - byte string comparison
• SCASB - byte string scanning
• STOSB - write byte to string

9. Full program reverse.asm


;;
;; initialized data
;;
section .data
SYS_WRITE equ 1
STD_OUT equ 1
SYS_EXIT equ 60
EXIT_CODE equ 0

Pages - 22
Computer architecture labs
NEW_LINE db 0xa
INPUT db "Hello world!"

;;
;; non initialized data
;;
section .bss
OUTPUT resb 1

;;
;; code
;;
section .text
global _start

;;
;; main routine
;;
_start:
;; get addres of INPUT
mov rsi, INPUT
;; zeroize rcx for counter
xor rcx, rcx
; df = 0 si++
cld
; remember place after function call
mov rdi, $ + 15
;; get string lengt
call calculateStrLength
;; write zeros to rax
xor rax, rax
;; additional counter for reverseStr
xor rdi, rdi
;; reverse string
jmp reverseStr

;;
;; calculate length of string
;;
calculateStrLength:
;; check is it end of string
cmp byte [rsi], 0
;; if yes exit from function
je exitFromRoutine
;; load byte from rsi to al and inc rsi
lodsb
;; push symbol to stack
push rax

Pages - 23
Computer architecture labs
;; increase counter
inc rcx
;; loop again
jmp calculateStrLength

;;
;; back to _start
;;
exitFromRoutine:
;; push return addres to stack again
push rdi
;; return to _start
ret

;;
;; reverse string
;;
;; 31 in stack
reverseStr:
;; check is it end of string
cmp rcx, 0
;; if yes print result string
je printResult
;; get symbol from stack
pop rax
;; write it to output buffer
mov [OUTPUT + rdi], rax
;; decrease length counter
dec rcx
;; increase additional length counter (for write syscall)
inc rdi
;; loop again
jmp reverseStr

;;
;; Print result string
;;
printResult:
mov rdx, rdi
mov rax, 1
mov rdi, 1
mov rsi, OUTPUT
syscall
jmp printNewLine

;;
;; Print new line
;;

Pages - 24
Computer architecture labs
printNewLine:
mov rax, SYS_WRITE
mov rdi, STD_OUT
mov rsi, NEW_LINE
mov rdx, 1
syscall
jmp exit

;;
;; Exit from program
;;
exit:
;; syscall number
mov rax, SYS_EXIT
;; exit code
mov rdi, EXIT_CODE
;; call sys_exit
syscall
Compile
all:
nasm -g -f elf64 -o reverse.o reverse.asm
ld -o reverse reverse.o

clean:
rm reverse reverse.o

10.

Pages - 25
Computer architecture labs

You might also like