diff options
author | Remko Tronçon <git@el-tramo.be> | 2009-12-03 20:06:33 (GMT) |
---|---|---|
committer | Remko Tronçon <git@el-tramo.be> | 2009-12-03 20:06:33 (GMT) |
commit | 5df50c35e1baeee517a0895a2c8a2cc2052b087f (patch) | |
tree | b9d90fd123ec251ebb930cfcfaa2ffc805433812 /3rdParty/CAres/src/ares_process.c | |
parent | 9b3edba27b8683f1a87ad66ee05802dd93bbbdfc (diff) | |
download | swift-contrib-5df50c35e1baeee517a0895a2c8a2cc2052b087f.zip swift-contrib-5df50c35e1baeee517a0895a2c8a2cc2052b087f.tar.bz2 |
Added C-Ares.
Diffstat (limited to '3rdParty/CAres/src/ares_process.c')
-rw-r--r-- | 3rdParty/CAres/src/ares_process.c | 1170 |
1 files changed, 1170 insertions, 0 deletions
diff --git a/3rdParty/CAres/src/ares_process.c b/3rdParty/CAres/src/ares_process.c new file mode 100644 index 0000000..6182ccc --- /dev/null +++ b/3rdParty/CAres/src/ares_process.c @@ -0,0 +1,1170 @@ +/* $Id: ares_process.c,v 1.79 2009-11-18 10:33:54 yangtse Exp $ */ + +/* Copyright 1998 by the Massachusetts Institute of Technology. + * Copyright (C) 2004-2009 by Daniel Stenberg + * + * Permission to use, copy, modify, and distribute this + * software and its documentation for any purpose and without + * fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting + * documentation, and that the name of M.I.T. not be used in + * advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. + * M.I.T. makes no representations about the suitability of + * this software for any purpose. It is provided "as is" + * without express or implied warranty. + */ + +#include "ares_setup.h" + +#ifdef HAVE_SYS_SOCKET_H +# include <sys/socket.h> +#endif +#ifdef HAVE_SYS_UIO_H +# include <sys/uio.h> +#endif +#ifdef HAVE_NETINET_IN_H +# include <netinet/in.h> +#endif +#ifdef HAVE_NETINET_TCP_H +# include <netinet/tcp.h> +#endif +#ifdef HAVE_NETDB_H +# include <netdb.h> +#endif +#ifdef HAVE_ARPA_NAMESER_H +# include <arpa/nameser.h> +#else +# include "nameser.h" +#endif +#ifdef HAVE_ARPA_NAMESER_COMPAT_H +# include <arpa/nameser_compat.h> +#endif + +#ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +#endif + +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif +#ifdef HAVE_SYS_IOCTL_H +# include <sys/ioctl.h> +#endif +#ifdef NETWARE +# include <sys/filio.h> +#endif + +#include <assert.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <time.h> +#include <errno.h> + +#include "ares.h" +#include "ares_dns.h" +#include "ares_private.h" + + +static int try_again(int errnum); +static void write_tcp_data(ares_channel channel, fd_set *write_fds, + ares_socket_t write_fd, struct timeval *now); +static void read_tcp_data(ares_channel channel, fd_set *read_fds, + ares_socket_t read_fd, struct timeval *now); +static void read_udp_packets(ares_channel channel, fd_set *read_fds, + ares_socket_t read_fd, struct timeval *now); +static void advance_tcp_send_queue(ares_channel channel, int whichserver, + ssize_t num_bytes); +static void process_timeouts(ares_channel channel, struct timeval *now); +static void process_broken_connections(ares_channel channel, + struct timeval *now); +static void process_answer(ares_channel channel, unsigned char *abuf, + int alen, int whichserver, int tcp, + struct timeval *now); +static void handle_error(ares_channel channel, int whichserver, + struct timeval *now); +static void skip_server(ares_channel channel, struct query *query, + int whichserver); +static void next_server(ares_channel channel, struct query *query, + struct timeval *now); +static int configure_socket(int s, ares_channel channel); +static int open_tcp_socket(ares_channel channel, struct server_state *server); +static int open_udp_socket(ares_channel channel, struct server_state *server); +static int same_questions(const unsigned char *qbuf, int qlen, + const unsigned char *abuf, int alen); +static void end_query(ares_channel channel, struct query *query, int status, + unsigned char *abuf, int alen); + +/* return true if now is exactly check time or later */ +int ares__timedout(struct timeval *now, + struct timeval *check) +{ + long secs = (now->tv_sec - check->tv_sec); + + if(secs > 0) + return 1; /* yes, timed out */ + if(secs < 0) + return 0; /* nope, not timed out */ + + /* if the full seconds were identical, check the sub second parts */ + return (now->tv_usec - check->tv_usec >= 0); +} + +/* add the specific number of milliseconds to the time in the first argument */ +int ares__timeadd(struct timeval *now, + int millisecs) +{ + now->tv_sec += millisecs/1000; + now->tv_usec += (millisecs%1000)*1000; + + if(now->tv_usec >= 1000000) { + ++(now->tv_sec); + now->tv_usec -= 1000000; + } + + return 0; +} + +/* return time offset between now and (future) check, in milliseconds */ +long ares__timeoffset(struct timeval *now, + struct timeval *check) +{ + return (check->tv_sec - now->tv_sec)*1000 + + (check->tv_usec - now->tv_usec)/1000; +} + + +/* Something interesting happened on the wire, or there was a timeout. + * See what's up and respond accordingly. + */ +void ares_process(ares_channel channel, fd_set *read_fds, fd_set *write_fds) +{ + struct timeval now = ares__tvnow(); + + write_tcp_data(channel, write_fds, ARES_SOCKET_BAD, &now); + read_tcp_data(channel, read_fds, ARES_SOCKET_BAD, &now); + read_udp_packets(channel, read_fds, ARES_SOCKET_BAD, &now); + process_timeouts(channel, &now); + process_broken_connections(channel, &now); +} + +/* Something interesting happened on the wire, or there was a timeout. + * See what's up and respond accordingly. + */ +void ares_process_fd(ares_channel channel, + ares_socket_t read_fd, /* use ARES_SOCKET_BAD or valid + file descriptors */ + ares_socket_t write_fd) +{ + struct timeval now = ares__tvnow(); + + write_tcp_data(channel, NULL, write_fd, &now); + read_tcp_data(channel, NULL, read_fd, &now); + read_udp_packets(channel, NULL, read_fd, &now); + process_timeouts(channel, &now); +} + + +/* Return 1 if the specified error number describes a readiness error, or 0 + * otherwise. This is mostly for HP-UX, which could return EAGAIN or + * EWOULDBLOCK. See this man page + * + * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html?manpage=/usr/share/man/man2.Z/send.2 + */ +static int try_again(int errnum) +{ +#if !defined EWOULDBLOCK && !defined EAGAIN +#error "Neither EWOULDBLOCK nor EAGAIN defined" +#endif + switch (errnum) + { +#ifdef EWOULDBLOCK + case EWOULDBLOCK: + return 1; +#endif +#if defined EAGAIN && EAGAIN != EWOULDBLOCK + case EAGAIN: + return 1; +#endif + } + return 0; +} + +/* If any TCP sockets select true for writing, write out queued data + * we have for them. + */ +static void write_tcp_data(ares_channel channel, + fd_set *write_fds, + ares_socket_t write_fd, + struct timeval *now) +{ + struct server_state *server; + struct send_request *sendreq; + struct iovec *vec; + int i; + ssize_t scount; + ssize_t wcount; + size_t n; + + if(!write_fds && (write_fd == ARES_SOCKET_BAD)) + /* no possible action */ + return; + + for (i = 0; i < channel->nservers; i++) + { + /* Make sure server has data to send and is selected in write_fds or + write_fd. */ + server = &channel->servers[i]; + if (!server->qhead || server->tcp_socket == ARES_SOCKET_BAD || + server->is_broken) + continue; + + if(write_fds) { + if(!FD_ISSET(server->tcp_socket, write_fds)) + continue; + } + else { + if(server->tcp_socket != write_fd) + continue; + } + + if(write_fds) + /* If there's an error and we close this socket, then open + * another with the same fd to talk to another server, then we + * don't want to think that it was the new socket that was + * ready. This is not disastrous, but is likely to result in + * extra system calls and confusion. */ + FD_CLR(server->tcp_socket, write_fds); + + /* Count the number of send queue items. */ + n = 0; + for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) + n++; + + /* Allocate iovecs so we can send all our data at once. */ + vec = malloc(n * sizeof(struct iovec)); + if (vec) + { + /* Fill in the iovecs and send. */ + n = 0; + for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) + { + vec[n].iov_base = (char *) sendreq->data; + vec[n].iov_len = sendreq->len; + n++; + } + wcount = (ssize_t)writev(server->tcp_socket, vec, (int)n); + free(vec); + if (wcount < 0) + { + if (!try_again(SOCKERRNO)) + handle_error(channel, i, now); + continue; + } + + /* Advance the send queue by as many bytes as we sent. */ + advance_tcp_send_queue(channel, i, wcount); + } + else + { + /* Can't allocate iovecs; just send the first request. */ + sendreq = server->qhead; + + scount = swrite(server->tcp_socket, sendreq->data, sendreq->len); + if (scount < 0) + { + if (!try_again(SOCKERRNO)) + handle_error(channel, i, now); + continue; + } + + /* Advance the send queue by as many bytes as we sent. */ + advance_tcp_send_queue(channel, i, scount); + } + } +} + +/* Consume the given number of bytes from the head of the TCP send queue. */ +static void advance_tcp_send_queue(ares_channel channel, int whichserver, + ssize_t num_bytes) +{ + struct send_request *sendreq; + struct server_state *server = &channel->servers[whichserver]; + while (num_bytes > 0) + { + sendreq = server->qhead; + if ((size_t)num_bytes >= sendreq->len) + { + num_bytes -= sendreq->len; + server->qhead = sendreq->next; + if (server->qhead == NULL) + { + SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 0); + server->qtail = NULL; + } + if (sendreq->data_storage != NULL) + free(sendreq->data_storage); + free(sendreq); + } + else + { + sendreq->data += num_bytes; + sendreq->len -= num_bytes; + num_bytes = 0; + } + } +} + +/* If any TCP socket selects true for reading, read some data, + * allocate a buffer if we finish reading the length word, and process + * a packet if we finish reading one. + */ +static void read_tcp_data(ares_channel channel, fd_set *read_fds, + ares_socket_t read_fd, struct timeval *now) +{ + struct server_state *server; + int i; + ssize_t count; + + if(!read_fds && (read_fd == ARES_SOCKET_BAD)) + /* no possible action */ + return; + + for (i = 0; i < channel->nservers; i++) + { + /* Make sure the server has a socket and is selected in read_fds. */ + server = &channel->servers[i]; + if (server->tcp_socket == ARES_SOCKET_BAD || server->is_broken) + continue; + + if(read_fds) { + if(!FD_ISSET(server->tcp_socket, read_fds)) + continue; + } + else { + if(server->tcp_socket != read_fd) + continue; + } + + if(read_fds) + /* If there's an error and we close this socket, then open + * another with the same fd to talk to another server, then we + * don't want to think that it was the new socket that was + * ready. This is not disastrous, but is likely to result in + * extra system calls and confusion. */ + FD_CLR(server->tcp_socket, read_fds); + + if (server->tcp_lenbuf_pos != 2) + { + /* We haven't yet read a length word, so read that (or + * what's left to read of it). + */ + count = sread(server->tcp_socket, + server->tcp_lenbuf + server->tcp_lenbuf_pos, + 2 - server->tcp_lenbuf_pos); + if (count <= 0) + { + if (!(count == -1 && try_again(SOCKERRNO))) + handle_error(channel, i, now); + continue; + } + + server->tcp_lenbuf_pos += (int)count; + if (server->tcp_lenbuf_pos == 2) + { + /* We finished reading the length word. Decode the + * length and allocate a buffer for the data. + */ + server->tcp_length = server->tcp_lenbuf[0] << 8 + | server->tcp_lenbuf[1]; + server->tcp_buffer = malloc(server->tcp_length); + if (!server->tcp_buffer) + handle_error(channel, i, now); + server->tcp_buffer_pos = 0; + } + } + else + { + /* Read data into the allocated buffer. */ + count = sread(server->tcp_socket, + server->tcp_buffer + server->tcp_buffer_pos, + server->tcp_length - server->tcp_buffer_pos); + if (count <= 0) + { + if (!(count == -1 && try_again(SOCKERRNO))) + handle_error(channel, i, now); + continue; + } + + server->tcp_buffer_pos += (int)count; + if (server->tcp_buffer_pos == server->tcp_length) + { + /* We finished reading this answer; process it and + * prepare to read another length word. + */ + process_answer(channel, server->tcp_buffer, server->tcp_length, + i, 1, now); + if (server->tcp_buffer) + free(server->tcp_buffer); + server->tcp_buffer = NULL; + server->tcp_lenbuf_pos = 0; + server->tcp_buffer_pos = 0; + } + } + } +} + +/* If any UDP sockets select true for reading, process them. */ +static void read_udp_packets(ares_channel channel, fd_set *read_fds, + ares_socket_t read_fd, struct timeval *now) +{ + struct server_state *server; + int i; + ssize_t count; + unsigned char buf[PACKETSZ + 1]; +#ifdef HAVE_RECVFROM + struct sockaddr_in from; + ares_socklen_t fromlen; +#endif + + if(!read_fds && (read_fd == ARES_SOCKET_BAD)) + /* no possible action */ + return; + + for (i = 0; i < channel->nservers; i++) + { + /* Make sure the server has a socket and is selected in read_fds. */ + server = &channel->servers[i]; + + if (server->udp_socket == ARES_SOCKET_BAD || server->is_broken) + continue; + + if(read_fds) { + if(!FD_ISSET(server->udp_socket, read_fds)) + continue; + } + else { + if(server->udp_socket != read_fd) + continue; + } + + if(read_fds) + /* If there's an error and we close this socket, then open + * another with the same fd to talk to another server, then we + * don't want to think that it was the new socket that was + * ready. This is not disastrous, but is likely to result in + * extra system calls and confusion. */ + FD_CLR(server->udp_socket, read_fds); + + /* To reduce event loop overhead, read and process as many + * packets as we can. */ + do { +#ifdef HAVE_RECVFROM + fromlen = sizeof(from); + count = (ssize_t)recvfrom(server->udp_socket, (void *)buf, sizeof(buf), + 0, (struct sockaddr *)&from, &fromlen); +#else + count = sread(server->udp_socket, buf, sizeof(buf)); +#endif + if (count == -1 && try_again(SOCKERRNO)) + continue; + else if (count <= 0) + handle_error(channel, i, now); +#ifdef HAVE_RECVFROM + else if (from.sin_addr.s_addr != server->addr.s_addr) + /* Address response came from did not match the address + * we sent the request to. Someone may be attempting + * to perform a cache poisoning attack */ + break; +#endif + else + process_answer(channel, buf, (int)count, i, 0, now); + } while (count > 0); + } +} + +/* If any queries have timed out, note the timeout and move them on. */ +static void process_timeouts(ares_channel channel, struct timeval *now) +{ + time_t t; /* the time of the timeouts we're processing */ + struct query *query; + struct list_node* list_head; + struct list_node* list_node; + + /* Process all the timeouts that have fired since the last time we + * processed timeouts. If things are going well, then we'll have + * hundreds/thousands of queries that fall into future buckets, and + * only a handful of requests that fall into the "now" bucket, so + * this should be quite quick. + */ + for (t = channel->last_timeout_processed; t <= now->tv_sec; t++) + { + list_head = &(channel->queries_by_timeout[t % ARES_TIMEOUT_TABLE_SIZE]); + for (list_node = list_head->next; list_node != list_head; ) + { + query = list_node->data; + list_node = list_node->next; /* in case the query gets deleted */ + if (query->timeout.tv_sec && ares__timedout(now, &query->timeout)) + { + query->error_status = ARES_ETIMEOUT; + ++query->timeouts; + next_server(channel, query, now); + } + } + } + channel->last_timeout_processed = now->tv_sec; +} + +/* Handle an answer from a server. */ +static void process_answer(ares_channel channel, unsigned char *abuf, + int alen, int whichserver, int tcp, + struct timeval *now) +{ + int tc, rcode; + unsigned short id; + struct query *query; + struct list_node* list_head; + struct list_node* list_node; + + /* If there's no room in the answer for a header, we can't do much + * with it. */ + if (alen < HFIXEDSZ) + return; + + /* Grab the query ID, truncate bit, and response code from the packet. */ + id = DNS_HEADER_QID(abuf); + tc = DNS_HEADER_TC(abuf); + rcode = DNS_HEADER_RCODE(abuf); + + /* Find the query corresponding to this packet. The queries are + * hashed/bucketed by query id, so this lookup should be quick. + * Note that both the query id and the questions must be the same; + * when the query id wraps around we can have multiple outstanding + * queries with the same query id, so we need to check both the id and + * question. + */ + query = NULL; + list_head = &(channel->queries_by_qid[id % ARES_QID_TABLE_SIZE]); + for (list_node = list_head->next; list_node != list_head; + list_node = list_node->next) + { + struct query *q = list_node->data; + if ((q->qid == id) && same_questions(q->qbuf, q->qlen, abuf, alen)) + { + query = q; + break; + } + } + if (!query) + return; + + /* If we got a truncated UDP packet and are not ignoring truncation, + * don't accept the packet, and switch the query to TCP if we hadn't + * done so already. + */ + if ((tc || alen > PACKETSZ) && !tcp && !(channel->flags & ARES_FLAG_IGNTC)) + { + if (!query->using_tcp) + { + query->using_tcp = 1; + ares__send_query(channel, query, now); + } + return; + } + + /* Limit alen to PACKETSZ if we aren't using TCP (only relevant if we + * are ignoring truncation. + */ + if (alen > PACKETSZ && !tcp) + alen = PACKETSZ; + + /* If we aren't passing through all error packets, discard packets + * with SERVFAIL, NOTIMP, or REFUSED response codes. + */ + if (!(channel->flags & ARES_FLAG_NOCHECKRESP)) + { + if (rcode == SERVFAIL || rcode == NOTIMP || rcode == REFUSED) + { + skip_server(channel, query, whichserver); + if (query->server == whichserver) + next_server(channel, query, now); + return; + } + } + + end_query(channel, query, ARES_SUCCESS, abuf, alen); +} + +/* Close all the connections that are no longer usable. */ +static void process_broken_connections(ares_channel channel, + struct timeval *now) +{ + int i; + for (i = 0; i < channel->nservers; i++) + { + struct server_state *server = &channel->servers[i]; + if (server->is_broken) + { + handle_error(channel, i, now); + } + } +} + +static void handle_error(ares_channel channel, int whichserver, + struct timeval *now) +{ + struct server_state *server; + struct query *query; + struct list_node list_head; + struct list_node* list_node; + + server = &channel->servers[whichserver]; + + /* Reset communications with this server. */ + ares__close_sockets(channel, server); + + /* Tell all queries talking to this server to move on and not try + * this server again. We steal the current list of queries that were + * in-flight to this server, since when we call next_server this can + * cause the queries to be re-sent to this server, which will + * re-insert these queries in that same server->queries_to_server + * list. + */ + ares__init_list_head(&list_head); + ares__swap_lists(&list_head, &(server->queries_to_server)); + for (list_node = list_head.next; list_node != &list_head; ) + { + query = list_node->data; + list_node = list_node->next; /* in case the query gets deleted */ + assert(query->server == whichserver); + skip_server(channel, query, whichserver); + next_server(channel, query, now); + } + /* Each query should have removed itself from our temporary list as + * it re-sent itself or finished up... + */ + assert(ares__is_list_empty(&list_head)); +} + +static void skip_server(ares_channel channel, struct query *query, + int whichserver) { + /* The given server gave us problems with this query, so if we have + * the luxury of using other servers, then let's skip the + * potentially broken server and just use the others. If we only + * have one server and we need to retry then we should just go ahead + * and re-use that server, since it's our only hope; perhaps we + * just got unlucky, and retrying will work (eg, the server timed + * out our TCP connection just as we were sending another request). + */ + if (channel->nservers > 1) + { + query->server_info[whichserver].skip_server = 1; + } +} + +static void next_server(ares_channel channel, struct query *query, + struct timeval *now) +{ + /* We need to try each server channel->tries times. We have channel->nservers + * servers to try. In total, we need to do channel->nservers * channel->tries + * attempts. Use query->try to remember how many times we already attempted + * this query. Use modular arithmetic to find the next server to try. */ + while (++(query->try) < (channel->nservers * channel->tries)) + { + struct server_state *server; + + /* Move on to the next server. */ + query->server = (query->server + 1) % channel->nservers; + server = &channel->servers[query->server]; + + /* We don't want to use this server if (1) we decided this + * connection is broken, and thus about to be closed, (2) + * we've decided to skip this server because of earlier + * errors we encountered, or (3) we already sent this query + * over this exact connection. + */ + if (!server->is_broken && + !query->server_info[query->server].skip_server && + !(query->using_tcp && + (query->server_info[query->server].tcp_connection_generation == + server->tcp_connection_generation))) + { + ares__send_query(channel, query, now); + return; + } + + /* You might think that with TCP we only need one try. However, + * even when using TCP, servers can time-out our connection just + * as we're sending a request, or close our connection because + * they die, or never send us a reply because they get wedged or + * tickle a bug that drops our request. + */ + } + + /* If we are here, all attempts to perform query failed. */ + end_query(channel, query, query->error_status, NULL, 0); +} + +void ares__send_query(ares_channel channel, struct query *query, + struct timeval *now) +{ + struct send_request *sendreq; + struct server_state *server; + int timeplus; + + server = &channel->servers[query->server]; + if (query->using_tcp) + { + /* Make sure the TCP socket for this server is set up and queue + * a send request. + */ + if (server->tcp_socket == ARES_SOCKET_BAD) + { + if (open_tcp_socket(channel, server) == -1) + { + skip_server(channel, query, query->server); + next_server(channel, query, now); + return; + } + } + sendreq = calloc(1, sizeof(struct send_request)); + if (!sendreq) + { + end_query(channel, query, ARES_ENOMEM, NULL, 0); + return; + } + /* To make the common case fast, we avoid copies by using the + * query's tcpbuf for as long as the query is alive. In the rare + * case where the query ends while it's queued for transmission, + * then we give the sendreq its own copy of the request packet + * and put it in sendreq->data_storage. + */ + sendreq->data_storage = NULL; + sendreq->data = query->tcpbuf; + sendreq->len = query->tcplen; + sendreq->owner_query = query; + sendreq->next = NULL; + if (server->qtail) + server->qtail->next = sendreq; + else + { + SOCK_STATE_CALLBACK(channel, server->tcp_socket, 1, 1); + server->qhead = sendreq; + } + server->qtail = sendreq; + query->server_info[query->server].tcp_connection_generation = + server->tcp_connection_generation; + } + else + { + if (server->udp_socket == ARES_SOCKET_BAD) + { + if (open_udp_socket(channel, server) == -1) + { + skip_server(channel, query, query->server); + next_server(channel, query, now); + return; + } + } + if (swrite(server->udp_socket, query->qbuf, query->qlen) == -1) + { + /* FIXME: Handle EAGAIN here since it likely can happen. */ + skip_server(channel, query, query->server); + next_server(channel, query, now); + return; + } + } + timeplus = channel->timeout << (query->try / channel->nservers); + timeplus = (timeplus * (9 + (rand () & 7))) / 16; + query->timeout = *now; + ares__timeadd(&query->timeout, + timeplus); + /* Keep track of queries bucketed by timeout, so we can process + * timeout events quickly. + */ + ares__remove_from_list(&(query->queries_by_timeout)); + ares__insert_in_list( + &(query->queries_by_timeout), + &(channel->queries_by_timeout[query->timeout.tv_sec % + ARES_TIMEOUT_TABLE_SIZE])); + + /* Keep track of queries bucketed by server, so we can process server + * errors quickly. + */ + ares__remove_from_list(&(query->queries_to_server)); + ares__insert_in_list(&(query->queries_to_server), + &(server->queries_to_server)); +} + +/* + * setsocknonblock sets the given socket to either blocking or non-blocking mode + * based on the 'nonblock' boolean argument. This function is highly portable. + */ +static int setsocknonblock(ares_socket_t sockfd, /* operate on this */ + int nonblock /* TRUE or FALSE */) +{ +#if defined(USE_BLOCKING_SOCKETS) + + return 0; /* returns success */ + +#elif defined(HAVE_FCNTL_O_NONBLOCK) + + /* most recent unix versions */ + int flags; + flags = fcntl(sockfd, F_GETFL, 0); + if (FALSE != nonblock) + return fcntl(sockfd, F_SETFL, flags | O_NONBLOCK); + else + return fcntl(sockfd, F_SETFL, flags & (~O_NONBLOCK)); + +#elif defined(HAVE_IOCTL_FIONBIO) + + /* older unix versions */ + int flags; + flags = nonblock; + return ioctl(sockfd, FIONBIO, &flags); + +#elif defined(HAVE_IOCTLSOCKET_FIONBIO) + +#ifdef WATT32 + char flags; +#else + /* Windows */ + unsigned long flags; +#endif + flags = nonblock; + return ioctlsocket(sockfd, FIONBIO, &flags); + +#elif defined(HAVE_IOCTLSOCKET_CAMEL_FIONBIO) + + /* Amiga */ + return IoctlSocket(sockfd, FIONBIO, (long)nonblock); + +#elif defined(HAVE_SETSOCKOPT_SO_NONBLOCK) + + /* BeOS */ + long b = nonblock ? 1 : 0; + return setsockopt(sockfd, SOL_SOCKET, SO_NONBLOCK, &b, sizeof(b)); + +#else +# error "no non-blocking method was found/used/set" +#endif +} + +static int configure_socket(int s, ares_channel channel) +{ + setsocknonblock(s, TRUE); + +#if defined(FD_CLOEXEC) && !defined(MSDOS) + /* Configure the socket fd as close-on-exec. */ + if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) + return -1; +#endif + + /* Set the socket's send and receive buffer sizes. */ + if ((channel->socket_send_buffer_size > 0) && + setsockopt(s, SOL_SOCKET, SO_SNDBUF, + (void *)&channel->socket_send_buffer_size, + sizeof(channel->socket_send_buffer_size)) == -1) + return -1; + + if ((channel->socket_receive_buffer_size > 0) && + setsockopt(s, SOL_SOCKET, SO_RCVBUF, + (void *)&channel->socket_receive_buffer_size, + sizeof(channel->socket_receive_buffer_size)) == -1) + return -1; + + return 0; + } + +static int open_tcp_socket(ares_channel channel, struct server_state *server) +{ + ares_socket_t s; + int opt; + struct sockaddr_in sockin; + + /* Acquire a socket. */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s == ARES_SOCKET_BAD) + return -1; + + /* Configure it. */ + if (configure_socket(s, channel) < 0) + { + sclose(s); + return -1; + } + +#ifdef TCP_NODELAY + /* + * Disable the Nagle algorithm (only relevant for TCP sockets, and thus not in + * configure_socket). In general, in DNS lookups we're pretty much interested + * in firing off a single request and then waiting for a reply, so batching + * isn't very interesting in general. + */ + opt = 1; + if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, + (void *)&opt, sizeof(opt)) == -1) + { + sclose(s); + return -1; + } +#endif + + /* Connect to the server. */ + memset(&sockin, 0, sizeof(sockin)); + sockin.sin_family = AF_INET; + sockin.sin_addr = server->addr; + sockin.sin_port = (unsigned short)(channel->tcp_port & 0xffff); + if (connect(s, (struct sockaddr *) &sockin, sizeof(sockin)) == -1) + { + int err = SOCKERRNO; + + if (err != EINPROGRESS && err != EWOULDBLOCK) + { + sclose(s); + return -1; + } + } + + if (channel->sock_create_cb) + { + int err = channel->sock_create_cb(s, SOCK_STREAM, + channel->sock_create_cb_data); + if (err < 0) + { + sclose(s); + return err; + } + } + + SOCK_STATE_CALLBACK(channel, s, 1, 0); + server->tcp_buffer_pos = 0; + server->tcp_socket = s; + server->tcp_connection_generation = ++channel->tcp_connection_generation; + return 0; +} + +static int open_udp_socket(ares_channel channel, struct server_state *server) +{ + ares_socket_t s; + struct sockaddr_in sockin; + + /* Acquire a socket. */ + s = socket(AF_INET, SOCK_DGRAM, 0); + if (s == ARES_SOCKET_BAD) + return -1; + + /* Set the socket non-blocking. */ + if (configure_socket(s, channel) < 0) + { + sclose(s); + return -1; + } + + /* Connect to the server. */ + memset(&sockin, 0, sizeof(sockin)); + sockin.sin_family = AF_INET; + sockin.sin_addr = server->addr; + sockin.sin_port = (unsigned short)(channel->udp_port & 0xffff); + if (connect(s, (struct sockaddr *) &sockin, sizeof(sockin)) == -1) + { + int err = SOCKERRNO; + + if (err != EINPROGRESS && err != EWOULDBLOCK) + { + sclose(s); + return -1; + } + } + + if (channel->sock_create_cb) + { + int err = channel->sock_create_cb(s, SOCK_DGRAM, + channel->sock_create_cb_data); + if (err < 0) + { + sclose(s); + return err; + } + } + + SOCK_STATE_CALLBACK(channel, s, 1, 0); + + server->udp_socket = s; + return 0; +} + +static int same_questions(const unsigned char *qbuf, int qlen, + const unsigned char *abuf, int alen) +{ + struct { + const unsigned char *p; + int qdcount; + char *name; + long namelen; + int type; + int dnsclass; + } q, a; + int i, j; + + if (qlen < HFIXEDSZ || alen < HFIXEDSZ) + return 0; + + /* Extract qdcount from the request and reply buffers and compare them. */ + q.qdcount = DNS_HEADER_QDCOUNT(qbuf); + a.qdcount = DNS_HEADER_QDCOUNT(abuf); + if (q.qdcount != a.qdcount) + return 0; + + /* For each question in qbuf, find it in abuf. */ + q.p = qbuf + HFIXEDSZ; + for (i = 0; i < q.qdcount; i++) + { + /* Decode the question in the query. */ + if (ares_expand_name(q.p, qbuf, qlen, &q.name, &q.namelen) + != ARES_SUCCESS) + return 0; + q.p += q.namelen; + if (q.p + QFIXEDSZ > qbuf + qlen) + { + free(q.name); + return 0; + } + q.type = DNS_QUESTION_TYPE(q.p); + q.dnsclass = DNS_QUESTION_CLASS(q.p); + q.p += QFIXEDSZ; + + /* Search for this question in the answer. */ + a.p = abuf + HFIXEDSZ; + for (j = 0; j < a.qdcount; j++) + { + /* Decode the question in the answer. */ + if (ares_expand_name(a.p, abuf, alen, &a.name, &a.namelen) + != ARES_SUCCESS) + { + free(q.name); + return 0; + } + a.p += a.namelen; + if (a.p + QFIXEDSZ > abuf + alen) + { + free(q.name); + free(a.name); + return 0; + } + a.type = DNS_QUESTION_TYPE(a.p); + a.dnsclass = DNS_QUESTION_CLASS(a.p); + a.p += QFIXEDSZ; + + /* Compare the decoded questions. */ + if (strcasecmp(q.name, a.name) == 0 && q.type == a.type + && q.dnsclass == a.dnsclass) + { + free(a.name); + break; + } + free(a.name); + } + + free(q.name); + if (j == a.qdcount) + return 0; + } + return 1; +} + +static void end_query (ares_channel channel, struct query *query, int status, + unsigned char *abuf, int alen) +{ + int i; + + /* First we check to see if this query ended while one of our send + * queues still has pointers to it. + */ + for (i = 0; i < channel->nservers; i++) + { + struct server_state *server = &channel->servers[i]; + struct send_request *sendreq; + for (sendreq = server->qhead; sendreq; sendreq = sendreq->next) + if (sendreq->owner_query == query) + { + sendreq->owner_query = NULL; + assert(sendreq->data_storage == NULL); + if (status == ARES_SUCCESS) + { + /* We got a reply for this query, but this queued + * sendreq points into this soon-to-be-gone query's + * tcpbuf. Probably this means we timed out and queued + * the query for retransmission, then received a + * response before actually retransmitting. This is + * perfectly fine, so we want to keep the connection + * running smoothly if we can. But in the worst case + * we may have sent only some prefix of the query, + * with some suffix of the query left to send. Also, + * the buffer may be queued on multiple queues. To + * prevent dangling pointers to the query's tcpbuf and + * handle these cases, we just give such sendreqs + * their own copy of the query packet. + */ + sendreq->data_storage = malloc(sendreq->len); + if (sendreq->data_storage != NULL) + { + memcpy(sendreq->data_storage, sendreq->data, sendreq->len); + sendreq->data = sendreq->data_storage; + } + } + if ((status != ARES_SUCCESS) || (sendreq->data_storage == NULL)) + { + /* We encountered an error (probably a timeout, + * suggesting the DNS server we're talking to is + * probably unreachable, wedged, or severely + * overloaded) or we couldn't copy the request, so + * mark the connection as broken. When we get to + * process_broken_connections() we'll close the + * connection and try to re-send requests to another + * server. + */ + server->is_broken = 1; + /* Just to be paranoid, zero out this sendreq... */ + sendreq->data = NULL; + sendreq->len = 0; + } + } + } + + /* Invoke the callback */ + query->callback(query->arg, status, query->timeouts, abuf, alen); + ares__free_query(query); + + /* Simple cleanup policy: if no queries are remaining, close all + * network sockets unless STAYOPEN is set. + */ + if (!(channel->flags & ARES_FLAG_STAYOPEN) && + ares__is_list_empty(&(channel->all_queries))) + { + for (i = 0; i < channel->nservers; i++) + ares__close_sockets(channel, &channel->servers[i]); + } +} + +void ares__free_query(struct query *query) +{ + /* Remove the query from all the lists in which it is linked */ + ares__remove_from_list(&(query->queries_by_qid)); + ares__remove_from_list(&(query->queries_by_timeout)); + ares__remove_from_list(&(query->queries_to_server)); + ares__remove_from_list(&(query->all_queries)); + /* Zero out some important stuff, to help catch bugs */ + query->callback = NULL; + query->arg = NULL; + /* Deallocate the memory associated with the query */ + free(query->tcpbuf); + free(query->server_info); + free(query); +} |