Socket Programming Basics

Download as ppt, pdf, or txt
Download as ppt, pdf, or txt
You are on page 1of 69

Sockets Part 2

Generic

IP Specifc

struct sockaddr
{
unsigned short sa_family;
char sa_data[14];
};
struct sockaddr_in
{
unsigned short sin_family;
unsigned short sin_port;
struct in_addr sin_addr;
char sin_zero[8];
};
struct in_addr
{
unsigned long s_addr;
};

sockaddr

sockaddr_in

/* Address family (e.g., AF_INET) */


/* Protocol-specific address information */

/* Internet protocol (AF_INET) */


/* Port (16-bits) */
/* Internet address (32-bits) */
/* Not used */

/* Internet address (32-bits) */

Family

Blob

2 bytes

2 bytes

4 bytes

Family

Port

Internet address

8 bytes
Not used

A socket address structures is always passed by reference when


passed as an argument to any socket functions. But any socket
function that takes one of these pointers as an argument must
deal with socket address structures from any of the supported
protocol families.

The IPv6 socket address is defned by including the <netinet/in.h>


header

The SIN6_LEN constant must be defned if the system supports the


length member for socket address structures.
The IPv6 family is AF_INET6, whereas the IPv4 family is AF_INET.
The members in this structure are ordered so that if the
sockaddr_in6 structure is 64-bit aligned, so is the 128-bit sin6_addr
member. On some 64-bit processors, data accesses of 64-bit values
are optimized if stored on a 64-bit boundary.
The sin6_flowinfo member is divided into two felds:
The low-order 20 bits are the flow label
The high-order 12 bits are reserved
The sin6_scope_id identifes the scope zone in which a scoped
address is meaningful, most commonly an interface index for a linklocal address

A new generic socket address structure was defned as part of the IPv6
sockets API, to overcome some of the shortcomings of the existing struct
sockaddr. Unlike the struct sockaddr, the new struct sockaddr_storage is
large enough to hold any socket address type supported by the system.
The sockaddr_storage structure is defned by including the <netinet/in.h>
header

The sockaddr_storage type provides a generic socket address


structure that is different from struct sockaddr in two ways:
a.If any socket address structures that the system supports have
alignment requirements, the sockaddr_storage provides the
strictest alignment requirement.
b.The sockaddr_storage is large enough to contain any socket
address structure that the system supports.

Network Programming - issues

Byte Ordering
Naming
Addressing

11

Byte Ordering of Integers


memory
address A +1

memory
address A

Stored at little-endian
computer

high-order byte

Integer representation (2
byte)

D3

F2

Stored at big-endian
computer

low-order byte

high-order byte

low-order byte

Different CPU architectures have different byte


ordering
12

Byte Ordering Problem

Question: What would happen if two computers with


different integer byte ordering communicate?

Answer:

Nothing if they do not exchange integers!


But: If they exchange integers, they would get the

Message in Memory of
little-endian Computer

Processing

Message is:
[Hello,1]

48 45 4C 4C 6F 01
00

Example:

Message is sent
across Network

Message is:
[Hello,512]

Processing

wrong order of bytes, therefore, the wrong value!

Message in Memory of
of big-endian Computer

48 45 4C 4C 6F 01
00
13

Byte Ordering Solution

There are two solutions if computers with different byte


ordering system want to communicate

They must know the kind of architecture of the sending


computer
(bad solution, it has not been implemented)
Introduction of a network byte order (big-endian). The functions
are:

uint16_t htons(uint16_t
host16bitvalue)
uint32_t htonl(uint32_t host32bitvalue)
uint16_t ntohs(uint16_t net16bitvalue)
uint32_t ntohs(uint32_t net32bitvalue)

Note: use for all integers (short and long), which


are sent across the network

Including port numbers and IP addresses

14

Network Byte Order


#include <netinet/in.h>
uint16_t htons(uint16_t
uint32_t htonl(uint32_t
uint16_t ntohs(uint16_t
uint32_t ntohl(uint32_t

host16bitvalue);
host32bitvalue);
net16bitvalue);
net32bitvalue);

# if __BYTE_ORDER == __BIG_ENDIAN
/* The host byte order is the same as network byte order,
so these functions are all just identity. */
# define ntohl(x) (x)
# define ntohs(x) (x)
# define htonl(x) (x)
# define htons(x) (x)
# else
# if __BYTE_ORDER == __LITTLE_ENDIAN
#
define ntohl(x)
__bswap_32 (x)
#
define ntohs(x)
__bswap_16 (x)
#
define htonl(x)
__bswap_32 (x)
#
define htons(x)
__bswap_16 (x)
# endif
# endif

Program to determine host byte order

Byte Manipulation Functions


2 groups of functions operate on multibyte fields
can't rely on C convention of null-terminated string, since
data can contain zeros (e.g IP addresses)
operate on multibyte felds, without interpreting the
data, and without assuming that the data is a nullterminated C string
need these types of functions when dealing with
socket address structures because we need to
manipulate felds such as IP addresses

Byte Manipulation functions


#include <strings.h>
void bzero(void *dest, size_t nbytes); /*zeros n
bytes
void bcopy(const void *src, void *dest, size_t
nbytes);
int bcmp(const void *ptr1, const void *ptr2,
size_t nbytes);
Returns: 0 if equal, nonzero if unequal
void* memset(void *dest, int c, size_t len);
void* memcpy(void *dest, const void *src, size_t
nbytes);
int memcmp(const void *ptr1, const void *ptr2,
size_t nbytes);
Returns: 0 if equal, <0 or >0 if unequal

Naming and Addressing

Host name
identifies a single host (Domain Name System)
variable length string (e.g. www.berkeley.edu)
is mapped to one or more IP addresses

IP Address
written as dotted octets (e.g. 10.0.0.1)
32 bits. Not a number! But often needs to be
converted to a 32-bit to use.

Port number
identifies a process on a host
16 bit number

19

inet_aton, inet_addr and


inet_ntoa

Convert internet addresses between


ASCII strings and network byte ordered
binary values

inet_aton, inet_ntoa and inet_addr convert


an Ipv4 address from a dotted-decimal
string (e.g. 206.168.112.96) to its 32-bit
network byte ordered binary value
Newer functions inet_pton and inet_ntop
handle both IPv4 and IPv6 addresses.

IPv4 Address Conversion Functions

#include <arpa/inet.h>
int inet_aton(const char *strptr, struct in_addr *addrptr);
Returns: 1 if string was valid, 0 on error
in_addr_t inet_addr(const char *strptr);
Returns: 32-bit binary network byte ordered IPv4 address;
INADDR_NONE if error
Problem: 255.255.255.255 it will return INADDR_NONE
char *inet_ntoa(struct in_addr inaddr);
Returns: pointer to dotted-decimal string

IPv4 & IPv6 Address Conversion Functions

inet_pton and inet_ntop Functions


#include <arpa/inet.h>
int inet_pton(int family, const char *strptr, void *addrptr);
Returns: 1 if OK, 0 if input not a valid presentation format, -1 on
error
const char *inet_ntop(int family, const void *addrptr, char
*strptr, size_t len);
Returns: pointer to result if OK, NULL on error
If family is not supported, both functions return an error with errno set to
EAFNOSUPPORT

To help specify this size, the following two defnitions are defned by includin
<netinet/in.h> header:
#define INET_ADDRSTRLEN 16 /* for IPv4 dotted-decimal */
#define INET6_ADDRSTRLEN 46 /* for IPv6 hex string */

If len is too small to hold the resulting presentation format, including the terminating
a null pointer is returned and errno is set to ENOSPC.

Summary of address conversion functions

Sock_ntop Function

inet_ntop is that it requires the caller to pass a pointer to a binary addre


the caller to know the format of the structure and the address family

sock_ntop that takes a pointer to a socket address structure, looks inside the structu
and calls the appropriate function to return the presentation format of the address.

#include "unp.h"
char *sock_ntop(const struct sockaddr *sockaddr, socklen_t
addrlen);
Returns: non-null pointer if OK, NULL on error
sockaddr points to a socket address structure whose length is
addrlen. The function uses its own static buffer to hold the result
and a pointer to this buffer is the return value.

readn, writen, and readline Functions

#include "unp.h"
ssize_t readn(int fledes, void *buff, size_t nbytes);
ssize_t writen(int fledes, const void *buff, size_t nbytes);
ssize_t readline(int fledes, void *buff, size_t maxlen);
All return: number of bytes read or written, 1 on error

Other useful functions

gethostname(char *name, int len): gets the name of the


current host
gethostbyaddr(char *addr, int len, int type): converts IP
hostname to structure containing long integer
gethostbyname(const char *name);
getaddrinfo() & getnameinfo() work with IPv6 as well
better to use these
int getsockname(int socket, struct sockaddr *restrict
address, socklen_t *restrict address_len);
Warning: check function assumptions about byte-ordering
(host or network). Often, they assume parameters / return
solutions in network byte-order

26

Issues in Client/Server
Programming

27

Issues in Client Programming

Identifying the Server.


Looking up a IP address.
Looking up a well known port name.
Specifying a local IP address.
UDP client design.
TCP client design.
28

Identifying the Server

Options:

hard-coded into the client program.


require that the user identify the server.
read from a configuration file.
use a separate protocol/network service to
lookup the identity of the server.

29

Identifying a TCP/IP server.

Need an IP address, protocol and port.

We often use host names instead of IP


addresses.
usually the protocol (UDP vs. TCP) is not
specified by the user.
often the port is not specified by the user.
Can you name one common exception ?

30

Services and Ports

Many services are available via well


known addresses (names).
There is a mapping of service names to
port numbers:
struct *servent getservbyname( char *service,
char *protocol );

servent->s_port is the port number in


network byte order.
31

Specifying a Local Address

When a client creates and binds a


socket it must specify a local port and
IP address.
Typically a client doesnt care what port
it is on:
haddr->port = htons(0);
give me any available port !

32

Local IP address

A client can also ask the operating system


to take care of specifying the local IP
address:
haddr->sin_addr.s_addr=
htonl(INADDR_ANY);
Give me the appropriate address

33

UDP Client Design

Establish server address (IP and port).


Allocate a socket.
Specify that any valid local port and IP
address can be used.
Communicate with server (send, recv)
Close the socket.

34

Connected mode UDP

A UDP client can call connect() to


establish the address of the server.
The UDP client can then use read() and
write() or send() and recv().
A UDP client using a connected mode
socket can only talk to one server
(using the connected-mode socket).
35

TCP Client Design

Establish server address (IP and port).


Allocate a socket.
Specify that any valid local port and IP
address can be used.
Call connect()
Communicate with server (read,write).
Close the connection.
36

Closing a TCP socket

Many TCP based application protocols


support multiple requests and/or
variable length requests over a single
TCP connection.
How does the server know when the
client is done (and it is OK to close the
socket) ?
37

Partial Close

One solution is for the client to shut


down only its writing end of the socket.
The shutdown() system call provides
this function.

shutdown(ints,intdirection);
direction can be 0 to close the reading end
or 1 to close the writing end.
shutdown sends info to the other process!
38

TCP sockets programming

Common problem areas:

null termination of strings.


reads dont correspond to writes.
synchronization (including close()).
ambiguous protocol.

39

TCP Reads

Each call to read() on a TCP socket


returns any available data (up to a
maximum).
TCP buffers data at both ends of the
connection.
You must be prepared to accept data 1
byte at a time from a TCP socket!
40

Readn, writen and readline


functions

Stream sockets (e.g. TCP sockets)


exhibit a behavior with the read and
write functions that differs from normal
file I/O
A read or write on a stream socket might
input or output fewer bytes than
requested

But this is not an error condition

All that is required is for caller to invoke


the read or write function again

Server Design
Iterative
Connectionless

Iterative
Connection-Oriented

Concurrent
Connectionless

Concurrent
Connection-Oriented

42

Concurrent vs. Iterative

An iterative server handles a single


client request at one time.
A concurrent server can handle multiple
client requests at one time.

43

Concurrent vs. Iterative

Concurrent
Large or variable size requests
Harder to program
Typically uses more system resources
Iterative
Small, fxed size requests
Easy to program

44

Connectionless vs.
Connection-Oriented

Connection-Oriented
EASY TO PROGRAM
transport protocol handles the tough
stuff.
requires separate socket for each
connection.
Connectionless

less overhead
no limitation on number of clients

45

Connectionless Iterative
Uses connectionless protocol: UDP.
server

Server processes one request at a time.


Server uses one single port [well-known port]

Connection-oriented concurrent server

Uses connection-oriented: TCP


At first connection is established.
Uses one well-known port and many ephemeral ports

Server issues passive-open at well-known port.


Client initially approaches this port.
After connection is made, server assigns a temporary port to free
the well-known port.
Data transfer via ephemeral port.

Concurrent Server
Design Alternatives

One child per client

Spawn one thread per client


Preforking multiple processes
Prethreaded Server

49

One child per client

Traditional Unix server:

TCP: after call to accept(), call fork().


UDP: after readfrom(), call fork().
Each process needs only a few sockets.
Small requests can be serviced in a small
amount of time.

Parent process needs to clean up after


children!!!! (call wait() ).
50

Fork and exec functions

Need to create a new child process to


handle each incoming client
request/transaction
fork function is the only way in Unix to
create a new process:
#include <unistd.h>
pid_t fork(void);

Returns: 0 in child, process ID of child in parent, -1 on error


Called once but returns TWICE

Once in the parent process (returns child process id),

and once in the child process (return of 0)

More Forking

All descriptors open in the parent before


the call to fork() are shared with the
child after fork returns.

Including the connected socket file


description returned by accept

Example

One thread per client

Almost like using fork() - just call


pthread_create instead.
Using threads makes it easier (less
overhead) to have sibling processes
share information.
Sharing information must be done
carefully (use pthread_mutex)
55

Prefork()d Server

Creating a new process for each client


is expensive.
We can create a bunch of processes,
each of which can take care of a client.
Each child process is an iterative
server.

56

Prefork()d TCP Server

Initial process creates socket and binds


to well known address.
Process now calls fork() a bunch of
times.
All children call accept().
The next incoming connection will be
handed to one child.
57

Preforking

Having too many preforked children can


be bad.
Using dynamic process allocation
instead of a hard-coded number of
children can avoid problems.
The parent process just manages the
children, doesnt worry about clients.
58

Sockets library vs. system call

A preforked TCP server wont usually


work the way we want if sockets is not
part of the kernel:

calling accept() is a library call, not an


atomic operation.

We can get around this by making sure


only one child calls accept() at a time
using some locking scheme.
59

Prethreaded Server

Same benefits as preforking.


Can also have the main thread do all
the calls to accept() and hand off each
client to an existing thread.

60

Whats the best server design


for my application?

Many factors:

expected number of simultaneous clients.


Transaction size (time to compute or
lookup the answer)
Variability in transaction size.
Available system resources (perhaps what
resources can be required in order to run
the service).
61

Statelessness

State: Information that a server


maintains about the status of ongoing
client interactions.
Connectionless servers that keep state
information must be designed carefully!
Messages can be duplicated!

62

The Dangers of Statefullness

Clients can go down at any time.


Client hosts can reboot many times.
The network can lose messages.
The network can duplicate messages.

63

Iterative echo server

/**** iserver.c ****/


#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#define SERVER_PORT 12345

/* Run with a number of incoming connection as argument */


int main(int argc, char *argv[])
{
int i, len, num, rc;
int listen_sd, accept_sd;
/* Buffer for data */
char buffer[100];
struct sockaddr_in addr;

/* If an argument was specified, use it to */


/* control the number of incoming connections */
if(argc >= 2)
num = atoi(argv[1]);
/* Prompt some message */
else
{
printf("Usage: %s <The_number_of_client_connection else 1 will be used>\n", arg
num = 1;
}

/* Create an AF_INET stream socket to receive */


/* incoming connections on */
listen_sd = socket(AF_INET, SOCK_STREAM, 0);
if(listen_sd < 0)
{
perror("Iserver - socket() error");
exit(-1);
}
else
printf("Iserver - socket() is OK\n");
printf("Binding the socket...\n");
/* Bind the socket */
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = htons(SERVER_PORT);
rc = bind(listen_sd, (struct sockaddr *)&addr, sizeof(addr));
if(rc < 0)
{
perror("Iserver - bind() error");
close(listen_sd);
exit(-1);
}
else
printf("Iserver - bind() is OK\n");

/* Set the listen backlog */


rc = listen(listen_sd, 5);
if(rc < 0)
{
perror("Iserver - listen() error");
close(listen_sd);
exit(-1);
}
else
printf("Iserver - listen() is OK\n");
/* Inform the user that the server is ready */
printf("The Iserver is ready!\n");
/* Go through the loop once for each connection */
for(i=0; i < num; i++)
{
/* Wait for an incoming connection */
printf("Iteration: #%d\n", i+1);
printf(" waiting on accept()\n");
accept_sd = accept(listen_sd, NULL, NULL);
if(accept_sd < 0)
{
perror("Iserver - accept() error");
close(listen_sd);
exit(-1);
}
else
printf("accept() is OK and completed successfully!\n");

/* Receive a message from the client */


printf("I am waiting client(s) to send message(s) to me...\n");
rc = recv(accept_sd, buffer, sizeof(buffer), 0);
if(rc <= 0)
{
perror("Iserver - recv() error");
close(listen_sd);
close(accept_sd);
exit(-1);
}
else
printf("The message from client: \"%s\"\n", buffer);
/* Echo the data back to the client */
printf("Echoing it back to client...\n");
len = rc;
rc = send(accept_sd, buffer, len, 0);
if(rc <= 0)
{
perror("Iserver - send() error");
close(listen_sd);
close(accept_sd);
exit(-1);
}
else
printf("Iserver - send() is OK.\n");
/* Close the incoming connection */
close(accept_sd);
}
/* Close the listen socket */
close(listen_sd);
return 0;
}

Concurrent server (using fork)


while(1){
if ((newfd = accept(listener, (struct sockaddr )&remoteaddr,
&addrlen))
== -1)
{
perror("accept");
}
else
{
//fork a new child whenever we have a new client connection
pid=fork();
if(pid==0){ //handle the client
printf("new connection from %s on socket %d\n",
inet_ntoa(remoteaddr.sin_addr),newfd);
printf( Recieved %d bytes from the client",
read(newfd,buf,sizeof(buf) ) );
printf("%s",buf); //do any other tasks as needed
exit(0);
}
}

THANK YOU

You might also like