Current File : //root/postfix-3.2.0/src/util/events.c |
/*++
/* NAME
/* events 3
/* SUMMARY
/* event manager
/* SYNOPSIS
/* #include <events.h>
/*
/* time_t event_time()
/*
/* void event_loop(delay)
/* int delay;
/*
/* time_t event_request_timer(callback, context, delay)
/* void (*callback)(int event, void *context);
/* void *context;
/* int delay;
/*
/* int event_cancel_timer(callback, context)
/* void (*callback)(int event, void *context);
/* void *context;
/*
/* void event_enable_read(fd, callback, context)
/* int fd;
/* void (*callback)(int event, void *context);
/* void *context;
/*
/* void event_enable_write(fd, callback, context)
/* int fd;
/* void (*callback)(int event, void *context);
/* void *context;
/*
/* void event_disable_readwrite(fd)
/* int fd;
/*
/* void event_drain(time_limit)
/* int time_limit;
/*
/* void event_fork(void)
/* DESCRIPTION
/* This module delivers I/O and timer events.
/* Multiple I/O streams and timers can be monitored simultaneously.
/* Events are delivered via callback routines provided by the
/* application. When requesting an event, the application can provide
/* private context that is passed back when the callback routine is
/* executed.
/*
/* event_time() returns a cached value of the current time.
/*
/* event_loop() monitors all I/O channels for which the application has
/* expressed interest, and monitors the timer request queue.
/* It notifies the application whenever events of interest happen.
/* A negative delay value causes the function to pause until something
/* happens; a positive delay value causes event_loop() to return when
/* the next event happens or when the delay time in seconds is over,
/* whatever happens first. A zero delay effectuates a poll.
/*
/* Note: in order to avoid race conditions, event_loop() cannot
/* not be called recursively.
/*
/* event_request_timer() causes the specified callback function to
/* be called with the specified context argument after \fIdelay\fR
/* seconds, or as soon as possible thereafter. The delay should
/* not be negative (the manifest EVENT_NULL_DELAY provides for
/* convenient zero-delay notification).
/* The event argument is equal to EVENT_TIME.
/* Only one timer request can be active per (callback, context) pair.
/* Calling event_request_timer() with an existing (callback, context)
/* pair does not schedule a new event, but updates the time of event
/* delivery. The result is the absolute time at which the timer is
/* scheduled to go off.
/*
/* event_cancel_timer() cancels the specified (callback, context) request.
/* The application is allowed to cancel non-existing requests. The result
/* value is the amount of time left before the timer would have gone off,
/* or -1 in case of no pending timer.
/*
/* event_enable_read() (event_enable_write()) enables read (write) events
/* on the named I/O channel. It is up to the application to assemble
/* partial reads or writes.
/* An I/O channel cannot handle more than one request at the
/* same time. The application is allowed to enable an event that
/* is already enabled (same channel, same read or write operation,
/* but perhaps a different callback or context). On systems with
/* kernel-based event filters this is preferred usage, because
/* each disable and enable request would cost a system call.
/*
/* The manifest constants EVENT_NULL_CONTEXT and EVENT_NULL_TYPE
/* provide convenient null values.
/*
/* The callback routine has the following arguments:
/* .IP fd
/* The stream on which the event happened.
/* .IP event
/* An indication of the event type:
/* .RS
/* .IP EVENT_READ
/* read event,
/* .IP EVENT_WRITE
/* write event,
/* .IP EVENT_XCPT
/* exception (actually, any event other than read or write).
/* .RE
/* .IP context
/* Application context given to event_enable_read() (event_enable_write()).
/* .PP
/* event_disable_readwrite() disables further I/O events on the specified
/* I/O channel. The application is allowed to cancel non-existing
/* I/O event requests.
/*
/* event_drain() repeatedly calls event_loop() until no more timer
/* events or I/O events are pending or until the time limit is reached.
/* This routine must not be called from an event_whatever() callback
/* routine. Note: this function assumes that no new I/O events
/* will be registered.
/*
/* event_fork() must be called by a child process after it is
/* created with fork(), to re-initialize event processing.
/* DIAGNOSTICS
/* Panics: interface violations. Fatal errors: out of memory,
/* system call failure. Warnings: the number of available
/* file descriptors is much less than FD_SETSIZE.
/* BUGS
/* This module is based on event selection. It assumes that the
/* event_loop() routine is called frequently. This approach is
/* not suitable for applications with compute-bound loops that
/* take a significant amount of time.
/* LICENSE
/* .ad
/* .fi
/* The Secure Mailer license must be distributed with this software.
/* AUTHOR(S)
/* Wietse Venema
/* IBM T.J. Watson Research
/* P.O. Box 704
/* Yorktown Heights, NY 10598, USA
/*--*/
/* System libraries. */
#include "sys_defs.h"
#include <sys/time.h> /* XXX: 44BSD uses bzero() */
#include <time.h>
#include <errno.h>
#include <unistd.h>
#include <stddef.h> /* offsetof() */
#include <string.h> /* bzero() prototype for 44BSD */
#include <limits.h> /* INT_MAX */
#ifdef USE_SYS_SELECT_H
#include <sys/select.h>
#endif
/* Application-specific. */
#include "mymalloc.h"
#include "msg.h"
#include "iostuff.h"
#include "ring.h"
#include "events.h"
#if !defined(EVENTS_STYLE)
#error "must define EVENTS_STYLE"
#endif
/*
* Traditional BSD-style select(2). Works everywhere, but has a built-in
* upper bound on the number of file descriptors, and that limit is hard to
* change on Linux. Is sometimes emulated with SYSV-style poll(2) which
* doesn't have the file descriptor limit, but unfortunately does not help
* to improve the performance of servers with lots of connections.
*/
#define EVENT_ALLOC_INCR 10
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
typedef fd_set EVENT_MASK;
#define EVENT_MASK_BYTE_COUNT(mask) sizeof(*(mask))
#define EVENT_MASK_ZERO(mask) FD_ZERO(mask)
#define EVENT_MASK_SET(fd, mask) FD_SET((fd), (mask))
#define EVENT_MASK_ISSET(fd, mask) FD_ISSET((fd), (mask))
#define EVENT_MASK_CLR(fd, mask) FD_CLR((fd), (mask))
#define EVENT_MASK_CMP(m1, m2) memcmp((m1), (m2), EVENT_MASK_BYTE_COUNT(m1))
#else
/*
* Kernel-based event filters (kqueue, /dev/poll, epoll). We use the
* following file descriptor mask structure which is expanded on the fly.
*/
typedef struct {
char *data; /* bit mask */
size_t data_len; /* data byte count */
} EVENT_MASK;
/* Bits per byte, byte in vector, bit offset in byte, bytes per set. */
#define EVENT_MASK_NBBY (8)
#define EVENT_MASK_FD_BYTE(fd, mask) \
(((unsigned char *) (mask)->data)[(fd) / EVENT_MASK_NBBY])
#define EVENT_MASK_FD_BIT(fd) (1 << ((fd) % EVENT_MASK_NBBY))
#define EVENT_MASK_BYTES_NEEDED(len) \
(((len) + (EVENT_MASK_NBBY -1)) / EVENT_MASK_NBBY)
#define EVENT_MASK_BYTE_COUNT(mask) ((mask)->data_len)
/* Memory management. */
#define EVENT_MASK_ALLOC(mask, bit_len) do { \
size_t _byte_len = EVENT_MASK_BYTES_NEEDED(bit_len); \
(mask)->data = mymalloc(_byte_len); \
memset((mask)->data, 0, _byte_len); \
(mask)->data_len = _byte_len; \
} while (0)
#define EVENT_MASK_REALLOC(mask, bit_len) do { \
size_t _byte_len = EVENT_MASK_BYTES_NEEDED(bit_len); \
size_t _old_len = (mask)->data_len; \
(mask)->data = myrealloc((mask)->data, _byte_len); \
if (_byte_len > _old_len) \
memset((mask)->data + _old_len, 0, _byte_len - _old_len); \
(mask)->data_len = _byte_len; \
} while (0)
#define EVENT_MASK_FREE(mask) myfree((mask)->data)
/* Set operations, modeled after FD_ZERO/SET/ISSET/CLR. */
#define EVENT_MASK_ZERO(mask) \
memset((mask)->data, 0, (mask)->data_len)
#define EVENT_MASK_SET(fd, mask) \
(EVENT_MASK_FD_BYTE((fd), (mask)) |= EVENT_MASK_FD_BIT(fd))
#define EVENT_MASK_ISSET(fd, mask) \
(EVENT_MASK_FD_BYTE((fd), (mask)) & EVENT_MASK_FD_BIT(fd))
#define EVENT_MASK_CLR(fd, mask) \
(EVENT_MASK_FD_BYTE((fd), (mask)) &= ~EVENT_MASK_FD_BIT(fd))
#define EVENT_MASK_CMP(m1, m2) \
memcmp((m1)->data, (m2)->data, EVENT_MASK_BYTE_COUNT(m1))
#endif
/*
* I/O events.
*/
typedef struct EVENT_FDTABLE EVENT_FDTABLE;
struct EVENT_FDTABLE {
EVENT_NOTIFY_RDWR_FN callback;
char *context;
};
static EVENT_MASK event_rmask; /* enabled read events */
static EVENT_MASK event_wmask; /* enabled write events */
static EVENT_MASK event_xmask; /* for bad news mostly */
static int event_fdlimit; /* per-process open file limit */
static EVENT_FDTABLE *event_fdtable; /* one slot per file descriptor */
static int event_fdslots; /* number of file descriptor slots */
static int event_max_fd = -1; /* highest fd number seen */
/*
* FreeBSD kqueue supports no system call to find out what descriptors are
* registered in the kernel-based filter. To implement our own sanity checks
* we maintain our own descriptor bitmask.
*
* FreeBSD kqueue does support application context pointers. Unfortunately,
* changing that information would cost a system call, and some of the
* competitors don't support application context. To keep the implementation
* simple we maintain our own table with call-back information.
*
* FreeBSD kqueue silently unregisters a descriptor from its filter when the
* descriptor is closed, so our information could get out of sync with the
* kernel. But that will never happen, because we have to meticulously
* unregister a file descriptor before it is closed, to avoid errors on
* systems that are built with EVENTS_STYLE == EVENTS_STYLE_SELECT.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_KQUEUE)
#include <sys/event.h>
/*
* Some early FreeBSD implementations don't have the EV_SET macro.
*/
#ifndef EV_SET
#define EV_SET(kp, id, fi, fl, ffl, da, ud) do { \
(kp)->ident = (id); \
(kp)->filter = (fi); \
(kp)->flags = (fl); \
(kp)->fflags = (ffl); \
(kp)->data = (da); \
(kp)->udata = (ud); \
} while(0)
#endif
/*
* Macros to initialize the kernel-based filter; see event_init().
*/
static int event_kq; /* handle to event filter */
#define EVENT_REG_INIT_HANDLE(er, n) do { \
er = event_kq = kqueue(); \
} while (0)
#define EVENT_REG_INIT_TEXT "kqueue"
#define EVENT_REG_FORK_HANDLE(er, n) do { \
(void) close(event_kq); \
EVENT_REG_INIT_HANDLE(er, (n)); \
} while (0)
/*
* Macros to update the kernel-based filter; see event_enable_read(),
* event_enable_write() and event_disable_readwrite().
*/
#define EVENT_REG_FD_OP(er, fh, ev, op) do { \
struct kevent dummy; \
EV_SET(&dummy, (fh), (ev), (op), 0, 0, 0); \
(er) = kevent(event_kq, &dummy, 1, 0, 0, 0); \
} while (0)
#define EVENT_REG_ADD_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EV_ADD)
#define EVENT_REG_ADD_READ(e, f) EVENT_REG_ADD_OP((e), (f), EVFILT_READ)
#define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_ADD_OP((e), (f), EVFILT_WRITE)
#define EVENT_REG_ADD_TEXT "kevent EV_ADD"
#define EVENT_REG_DEL_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EV_DELETE)
#define EVENT_REG_DEL_READ(e, f) EVENT_REG_DEL_OP((e), (f), EVFILT_READ)
#define EVENT_REG_DEL_WRITE(e, f) EVENT_REG_DEL_OP((e), (f), EVFILT_WRITE)
#define EVENT_REG_DEL_TEXT "kevent EV_DELETE"
/*
* Macros to retrieve event buffers from the kernel; see event_loop().
*/
typedef struct kevent EVENT_BUFFER;
#define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
struct timespec ts; \
struct timespec *tsp; \
if ((delay) < 0) { \
tsp = 0; \
} else { \
tsp = &ts; \
ts.tv_nsec = 0; \
ts.tv_sec = (delay); \
} \
(event_count) = kevent(event_kq, (struct kevent *) 0, 0, (event_buf), \
(buflen), (tsp)); \
} while (0)
#define EVENT_BUFFER_READ_TEXT "kevent"
/*
* Macros to process event buffers from the kernel; see event_loop().
*/
#define EVENT_GET_FD(bp) ((bp)->ident)
#define EVENT_GET_TYPE(bp) ((bp)->filter)
#define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) == EVFILT_READ)
#define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) == EVFILT_WRITE)
#endif
/*
* Solaris /dev/poll does not support application context, so we have to
* maintain our own. This has the benefit of avoiding an expensive system
* call just to change a call-back function or argument.
*
* Solaris /dev/poll does have a way to query if a specific descriptor is
* registered. However, we maintain a descriptor mask anyway because a) it
* avoids having to make an expensive system call to find out if something
* is registered, b) some EVENTS_STYLE_MUMBLE implementations need a
* descriptor bitmask anyway and c) we use the bitmask already to implement
* sanity checks.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_DEVPOLL)
#include <sys/devpoll.h>
#include <fcntl.h>
/*
* Macros to initialize the kernel-based filter; see event_init().
*/
static int event_pollfd; /* handle to file descriptor set */
#define EVENT_REG_INIT_HANDLE(er, n) do { \
er = event_pollfd = open("/dev/poll", O_RDWR); \
if (event_pollfd >= 0) close_on_exec(event_pollfd, CLOSE_ON_EXEC); \
} while (0)
#define EVENT_REG_INIT_TEXT "open /dev/poll"
#define EVENT_REG_FORK_HANDLE(er, n) do { \
(void) close(event_pollfd); \
EVENT_REG_INIT_HANDLE(er, (n)); \
} while (0)
/*
* Macros to update the kernel-based filter; see event_enable_read(),
* event_enable_write() and event_disable_readwrite().
*/
#define EVENT_REG_FD_OP(er, fh, ev) do { \
struct pollfd dummy; \
dummy.fd = (fh); \
dummy.events = (ev); \
(er) = write(event_pollfd, (void *) &dummy, \
sizeof(dummy)) != sizeof(dummy) ? -1 : 0; \
} while (0)
#define EVENT_REG_ADD_READ(e, f) EVENT_REG_FD_OP((e), (f), POLLIN)
#define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_FD_OP((e), (f), POLLOUT)
#define EVENT_REG_ADD_TEXT "write /dev/poll"
#define EVENT_REG_DEL_BOTH(e, f) EVENT_REG_FD_OP((e), (f), POLLREMOVE)
#define EVENT_REG_DEL_TEXT "write /dev/poll"
/*
* Macros to retrieve event buffers from the kernel; see event_loop().
*/
typedef struct pollfd EVENT_BUFFER;
#define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
struct dvpoll dvpoll; \
dvpoll.dp_fds = (event_buf); \
dvpoll.dp_nfds = (buflen); \
dvpoll.dp_timeout = (delay) < 0 ? -1 : (delay) * 1000; \
(event_count) = ioctl(event_pollfd, DP_POLL, &dvpoll); \
} while (0)
#define EVENT_BUFFER_READ_TEXT "ioctl DP_POLL"
/*
* Macros to process event buffers from the kernel; see event_loop().
*/
#define EVENT_GET_FD(bp) ((bp)->fd)
#define EVENT_GET_TYPE(bp) ((bp)->revents)
#define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) & POLLIN)
#define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) & POLLOUT)
#endif
/*
* Linux epoll supports no system call to find out what descriptors are
* registered in the kernel-based filter. To implement our own sanity checks
* we maintain our own descriptor bitmask.
*
* Linux epoll does support application context pointers. Unfortunately,
* changing that information would cost a system call, and some of the
* competitors don't support application context. To keep the implementation
* simple we maintain our own table with call-back information.
*
* Linux epoll silently unregisters a descriptor from its filter when the
* descriptor is closed, so our information could get out of sync with the
* kernel. But that will never happen, because we have to meticulously
* unregister a file descriptor before it is closed, to avoid errors on
* systems that are built with EVENTS_STYLE == EVENTS_STYLE_SELECT.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_EPOLL)
#include <sys/epoll.h>
/*
* Macros to initialize the kernel-based filter; see event_init().
*/
static int event_epollfd; /* epoll handle */
#define EVENT_REG_INIT_HANDLE(er, n) do { \
er = event_epollfd = epoll_create(n); \
if (event_epollfd >= 0) close_on_exec(event_epollfd, CLOSE_ON_EXEC); \
} while (0)
#define EVENT_REG_INIT_TEXT "epoll_create"
#define EVENT_REG_FORK_HANDLE(er, n) do { \
(void) close(event_epollfd); \
EVENT_REG_INIT_HANDLE(er, (n)); \
} while (0)
/*
* Macros to update the kernel-based filter; see event_enable_read(),
* event_enable_write() and event_disable_readwrite().
*/
#define EVENT_REG_FD_OP(er, fh, ev, op) do { \
struct epoll_event dummy; \
dummy.events = (ev); \
dummy.data.fd = (fh); \
(er) = epoll_ctl(event_epollfd, (op), (fh), &dummy); \
} while (0)
#define EVENT_REG_ADD_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EPOLL_CTL_ADD)
#define EVENT_REG_ADD_READ(e, f) EVENT_REG_ADD_OP((e), (f), EPOLLIN)
#define EVENT_REG_ADD_WRITE(e, f) EVENT_REG_ADD_OP((e), (f), EPOLLOUT)
#define EVENT_REG_ADD_TEXT "epoll_ctl EPOLL_CTL_ADD"
#define EVENT_REG_DEL_OP(e, f, ev) EVENT_REG_FD_OP((e), (f), (ev), EPOLL_CTL_DEL)
#define EVENT_REG_DEL_READ(e, f) EVENT_REG_DEL_OP((e), (f), EPOLLIN)
#define EVENT_REG_DEL_WRITE(e, f) EVENT_REG_DEL_OP((e), (f), EPOLLOUT)
#define EVENT_REG_DEL_TEXT "epoll_ctl EPOLL_CTL_DEL"
/*
* Macros to retrieve event buffers from the kernel; see event_loop().
*/
typedef struct epoll_event EVENT_BUFFER;
#define EVENT_BUFFER_READ(event_count, event_buf, buflen, delay) do { \
(event_count) = epoll_wait(event_epollfd, (event_buf), (buflen), \
(delay) < 0 ? -1 : (delay) * 1000); \
} while (0)
#define EVENT_BUFFER_READ_TEXT "epoll_wait"
/*
* Macros to process event buffers from the kernel; see event_loop().
*/
#define EVENT_GET_FD(bp) ((bp)->data.fd)
#define EVENT_GET_TYPE(bp) ((bp)->events)
#define EVENT_TEST_READ(bp) (EVENT_GET_TYPE(bp) & EPOLLIN)
#define EVENT_TEST_WRITE(bp) (EVENT_GET_TYPE(bp) & EPOLLOUT)
#endif
/*
* Timer events. Timer requests are kept sorted, in a circular list. We use
* the RING abstraction, so we get to use a couple ugly macros.
*
* When a call-back function adds a timer request, we label the request with
* the event_loop() call instance that invoked the call-back. We use this to
* prevent zero-delay timer requests from running in a tight loop and
* starving I/O events.
*/
typedef struct EVENT_TIMER EVENT_TIMER;
struct EVENT_TIMER {
time_t when; /* when event is wanted */
EVENT_NOTIFY_TIME_FN callback; /* callback function */
char *context; /* callback context */
long loop_instance; /* event_loop() call instance */
RING ring; /* linkage */
};
static RING event_timer_head; /* timer queue head */
static long event_loop_instance; /* event_loop() call instance */
#define RING_TO_TIMER(r) \
((EVENT_TIMER *) ((void *) (r) - offsetof(EVENT_TIMER, ring)))
#define FOREACH_QUEUE_ENTRY(entry, head) \
for (entry = ring_succ(head); entry != (head); entry = ring_succ(entry))
#define FIRST_TIMER(head) \
(ring_succ(head) != (head) ? RING_TO_TIMER(ring_succ(head)) : 0)
/*
* Other private data structures.
*/
static time_t event_present; /* cached time of day */
#define EVENT_INIT_NEEDED() (event_present == 0)
/* event_init - set up tables and such */
static void event_init(void)
{
EVENT_FDTABLE *fdp;
int err;
if (!EVENT_INIT_NEEDED())
msg_panic("event_init: repeated call");
/*
* Initialize the file descriptor masks and the call-back table. Where
* possible we extend these data structures on the fly. With select(2)
* based implementations we can only handle FD_SETSIZE open files.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
if ((event_fdlimit = open_limit(FD_SETSIZE)) < 0)
msg_fatal("unable to determine open file limit");
#else
if ((event_fdlimit = open_limit(INT_MAX)) < 0)
msg_fatal("unable to determine open file limit");
#endif
if (event_fdlimit < FD_SETSIZE / 2 && event_fdlimit < 256)
msg_warn("could allocate space for only %d open files", event_fdlimit);
event_fdslots = EVENT_ALLOC_INCR;
event_fdtable = (EVENT_FDTABLE *)
mymalloc(sizeof(EVENT_FDTABLE) * event_fdslots);
for (fdp = event_fdtable; fdp < event_fdtable + event_fdslots; fdp++) {
fdp->callback = 0;
fdp->context = 0;
}
/*
* Initialize the I/O event request masks.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
EVENT_MASK_ZERO(&event_rmask);
EVENT_MASK_ZERO(&event_wmask);
EVENT_MASK_ZERO(&event_xmask);
#else
EVENT_MASK_ALLOC(&event_rmask, event_fdslots);
EVENT_MASK_ALLOC(&event_wmask, event_fdslots);
EVENT_MASK_ALLOC(&event_xmask, event_fdslots);
/*
* Initialize the kernel-based filter.
*/
EVENT_REG_INIT_HANDLE(err, event_fdslots);
if (err < 0)
msg_fatal("%s: %m", EVENT_REG_INIT_TEXT);
#endif
/*
* Initialize timer stuff.
*/
ring_init(&event_timer_head);
(void) time(&event_present);
/*
* Avoid an infinite initialization loop.
*/
if (EVENT_INIT_NEEDED())
msg_panic("event_init: unable to initialize");
}
/* event_extend - make room for more descriptor slots */
static void event_extend(int fd)
{
const char *myname = "event_extend";
int old_slots = event_fdslots;
int new_slots = (event_fdslots > fd / 2 ?
2 * old_slots : fd + EVENT_ALLOC_INCR);
EVENT_FDTABLE *fdp;
#ifdef EVENT_REG_UPD_HANDLE
int err;
#endif
if (msg_verbose > 2)
msg_info("%s: fd %d", myname, fd);
event_fdtable = (EVENT_FDTABLE *)
myrealloc((void *) event_fdtable, sizeof(EVENT_FDTABLE) * new_slots);
event_fdslots = new_slots;
for (fdp = event_fdtable + old_slots;
fdp < event_fdtable + new_slots; fdp++) {
fdp->callback = 0;
fdp->context = 0;
}
/*
* Initialize the I/O event request masks.
*/
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
EVENT_MASK_REALLOC(&event_rmask, new_slots);
EVENT_MASK_REALLOC(&event_wmask, new_slots);
EVENT_MASK_REALLOC(&event_xmask, new_slots);
#endif
#ifdef EVENT_REG_UPD_HANDLE
EVENT_REG_UPD_HANDLE(err, new_slots);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_UPD_TEXT);
#endif
}
/* event_time - look up cached time of day */
time_t event_time(void)
{
if (EVENT_INIT_NEEDED())
event_init();
return (event_present);
}
/* event_drain - loop until all pending events are done */
void event_drain(int time_limit)
{
EVENT_MASK zero_mask;
time_t max_time;
if (EVENT_INIT_NEEDED())
return;
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
EVENT_MASK_ZERO(&zero_mask);
#else
EVENT_MASK_ALLOC(&zero_mask, event_fdslots);
#endif
(void) time(&event_present);
max_time = event_present + time_limit;
while (event_present < max_time
&& (event_timer_head.pred != &event_timer_head
|| EVENT_MASK_CMP(&zero_mask, &event_xmask) != 0)) {
event_loop(1);
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
if (EVENT_MASK_BYTE_COUNT(&zero_mask)
!= EVENT_MASK_BYTES_NEEDED(event_fdslots))
EVENT_MASK_REALLOC(&zero_mask, event_fdslots);
#endif
}
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
EVENT_MASK_FREE(&zero_mask);
#endif
}
/* event_fork - resume event processing after fork() */
void event_fork(void)
{
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
EVENT_FDTABLE *fdp;
int err;
int fd;
/*
* No event was ever registered, so there's nothing to be done.
*/
if (EVENT_INIT_NEEDED())
return;
/*
* Close the existing filter handle and open a new kernel-based filter.
*/
EVENT_REG_FORK_HANDLE(err, event_fdslots);
if (err < 0)
msg_fatal("%s: %m", EVENT_REG_INIT_TEXT);
/*
* Populate the new kernel-based filter with events that were registered
* in the parent process.
*/
for (fd = 0; fd <= event_max_fd; fd++) {
if (EVENT_MASK_ISSET(fd, &event_wmask)) {
EVENT_MASK_CLR(fd, &event_wmask);
fdp = event_fdtable + fd;
event_enable_write(fd, fdp->callback, fdp->context);
} else if (EVENT_MASK_ISSET(fd, &event_rmask)) {
EVENT_MASK_CLR(fd, &event_rmask);
fdp = event_fdtable + fd;
event_enable_read(fd, fdp->callback, fdp->context);
}
}
#endif
}
/* event_enable_read - enable read events */
void event_enable_read(int fd, EVENT_NOTIFY_RDWR_FN callback, void *context)
{
const char *myname = "event_enable_read";
EVENT_FDTABLE *fdp;
int err;
if (EVENT_INIT_NEEDED())
event_init();
/*
* Sanity checks.
*/
if (fd < 0 || fd >= event_fdlimit)
msg_panic("%s: bad file descriptor: %d", myname, fd);
if (msg_verbose > 2)
msg_info("%s: fd %d", myname, fd);
if (fd >= event_fdslots)
event_extend(fd);
/*
* Disallow mixed (i.e. read and write) requests on the same descriptor.
*/
if (EVENT_MASK_ISSET(fd, &event_wmask))
msg_panic("%s: fd %d: read/write I/O request", myname, fd);
/*
* Postfix 2.4 allows multiple event_enable_read() calls on the same
* descriptor without requiring event_disable_readwrite() calls between
* them. With kernel-based filters (kqueue, /dev/poll, epoll) it's
* wasteful to make system calls when we change only application
* call-back information. It has a noticeable effect on smtp-source
* performance.
*/
if (EVENT_MASK_ISSET(fd, &event_rmask) == 0) {
EVENT_MASK_SET(fd, &event_xmask);
EVENT_MASK_SET(fd, &event_rmask);
if (event_max_fd < fd)
event_max_fd = fd;
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
EVENT_REG_ADD_READ(err, fd);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_ADD_TEXT);
#endif
}
fdp = event_fdtable + fd;
if (fdp->callback != callback || fdp->context != context) {
fdp->callback = callback;
fdp->context = context;
}
}
/* event_enable_write - enable write events */
void event_enable_write(int fd, EVENT_NOTIFY_RDWR_FN callback, void *context)
{
const char *myname = "event_enable_write";
EVENT_FDTABLE *fdp;
int err;
if (EVENT_INIT_NEEDED())
event_init();
/*
* Sanity checks.
*/
if (fd < 0 || fd >= event_fdlimit)
msg_panic("%s: bad file descriptor: %d", myname, fd);
if (msg_verbose > 2)
msg_info("%s: fd %d", myname, fd);
if (fd >= event_fdslots)
event_extend(fd);
/*
* Disallow mixed (i.e. read and write) requests on the same descriptor.
*/
if (EVENT_MASK_ISSET(fd, &event_rmask))
msg_panic("%s: fd %d: read/write I/O request", myname, fd);
/*
* Postfix 2.4 allows multiple event_enable_write() calls on the same
* descriptor without requiring event_disable_readwrite() calls between
* them. With kernel-based filters (kqueue, /dev/poll, epoll) it's
* incredibly wasteful to make unregister and register system calls when
* we change only application call-back information. It has a noticeable
* effect on smtp-source performance.
*/
if (EVENT_MASK_ISSET(fd, &event_wmask) == 0) {
EVENT_MASK_SET(fd, &event_xmask);
EVENT_MASK_SET(fd, &event_wmask);
if (event_max_fd < fd)
event_max_fd = fd;
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
EVENT_REG_ADD_WRITE(err, fd);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_ADD_TEXT);
#endif
}
fdp = event_fdtable + fd;
if (fdp->callback != callback || fdp->context != context) {
fdp->callback = callback;
fdp->context = context;
}
}
/* event_disable_readwrite - disable request for read or write events */
void event_disable_readwrite(int fd)
{
const char *myname = "event_disable_readwrite";
EVENT_FDTABLE *fdp;
int err;
if (EVENT_INIT_NEEDED())
event_init();
/*
* Sanity checks.
*/
if (fd < 0 || fd >= event_fdlimit)
msg_panic("%s: bad file descriptor: %d", myname, fd);
if (msg_verbose > 2)
msg_info("%s: fd %d", myname, fd);
/*
* Don't complain when there is nothing to cancel. The request may have
* been canceled from another thread.
*/
if (fd >= event_fdslots)
return;
#if (EVENTS_STYLE != EVENTS_STYLE_SELECT)
#ifdef EVENT_REG_DEL_BOTH
/* XXX Can't seem to disable READ and WRITE events selectively. */
if (EVENT_MASK_ISSET(fd, &event_rmask)
|| EVENT_MASK_ISSET(fd, &event_wmask)) {
EVENT_REG_DEL_BOTH(err, fd);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
}
#else
if (EVENT_MASK_ISSET(fd, &event_rmask)) {
EVENT_REG_DEL_READ(err, fd);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
} else if (EVENT_MASK_ISSET(fd, &event_wmask)) {
EVENT_REG_DEL_WRITE(err, fd);
if (err < 0)
msg_fatal("%s: %s: %m", myname, EVENT_REG_DEL_TEXT);
}
#endif /* EVENT_REG_DEL_BOTH */
#endif /* != EVENTS_STYLE_SELECT */
EVENT_MASK_CLR(fd, &event_xmask);
EVENT_MASK_CLR(fd, &event_rmask);
EVENT_MASK_CLR(fd, &event_wmask);
fdp = event_fdtable + fd;
fdp->callback = 0;
fdp->context = 0;
}
/* event_request_timer - (re)set timer */
time_t event_request_timer(EVENT_NOTIFY_TIME_FN callback, void *context, int delay)
{
const char *myname = "event_request_timer";
RING *ring;
EVENT_TIMER *timer;
if (EVENT_INIT_NEEDED())
event_init();
/*
* Sanity checks.
*/
if (delay < 0)
msg_panic("%s: invalid delay: %d", myname, delay);
/*
* Make sure we schedule this event at the right time.
*/
time(&event_present);
/*
* See if they are resetting an existing timer request. If so, take the
* request away from the timer queue so that it can be inserted at the
* right place.
*/
FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
timer = RING_TO_TIMER(ring);
if (timer->callback == callback && timer->context == context) {
timer->when = event_present + delay;
timer->loop_instance = event_loop_instance;
ring_detach(ring);
if (msg_verbose > 2)
msg_info("%s: reset 0x%lx 0x%lx %d", myname,
(long) callback, (long) context, delay);
break;
}
}
/*
* If not found, schedule a new timer request.
*/
if (ring == &event_timer_head) {
timer = (EVENT_TIMER *) mymalloc(sizeof(EVENT_TIMER));
timer->when = event_present + delay;
timer->callback = callback;
timer->context = context;
timer->loop_instance = event_loop_instance;
if (msg_verbose > 2)
msg_info("%s: set 0x%lx 0x%lx %d", myname,
(long) callback, (long) context, delay);
}
/*
* Timer requests are kept sorted to reduce lookup overhead in the event
* loop.
*
* XXX Append the new request after existing requests for the same time
* slot. The event_loop() routine depends on this to avoid starving I/O
* events when a call-back function schedules a zero-delay timer request.
*/
FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
if (timer->when < RING_TO_TIMER(ring)->when)
break;
}
ring_prepend(ring, &timer->ring);
return (timer->when);
}
/* event_cancel_timer - cancel timer */
int event_cancel_timer(EVENT_NOTIFY_TIME_FN callback, void *context)
{
const char *myname = "event_cancel_timer";
RING *ring;
EVENT_TIMER *timer;
int time_left = -1;
if (EVENT_INIT_NEEDED())
event_init();
/*
* See if they are canceling an existing timer request. Do not complain
* when the request is not found. It might have been canceled from some
* other thread.
*/
FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
timer = RING_TO_TIMER(ring);
if (timer->callback == callback && timer->context == context) {
if ((time_left = timer->when - event_present) < 0)
time_left = 0;
ring_detach(ring);
myfree((void *) timer);
break;
}
}
if (msg_verbose > 2)
msg_info("%s: 0x%lx 0x%lx %d", myname,
(long) callback, (long) context, time_left);
return (time_left);
}
/* event_loop - wait for the next event */
void event_loop(int delay)
{
const char *myname = "event_loop";
static int nested;
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
fd_set rmask;
fd_set wmask;
fd_set xmask;
struct timeval tv;
struct timeval *tvp;
int new_max_fd;
#else
EVENT_BUFFER event_buf[100];
EVENT_BUFFER *bp;
#endif
int event_count;
EVENT_TIMER *timer;
int fd;
EVENT_FDTABLE *fdp;
int select_delay;
if (EVENT_INIT_NEEDED())
event_init();
/*
* XXX Also print the select() masks?
*/
if (msg_verbose > 2) {
RING *ring;
FOREACH_QUEUE_ENTRY(ring, &event_timer_head) {
timer = RING_TO_TIMER(ring);
msg_info("%s: time left %3d for 0x%lx 0x%lx", myname,
(int) (timer->when - event_present),
(long) timer->callback, (long) timer->context);
}
}
/*
* Find out when the next timer would go off. Timer requests are sorted.
* If any timer is scheduled, adjust the delay appropriately.
*/
if ((timer = FIRST_TIMER(&event_timer_head)) != 0) {
event_present = time((time_t *) 0);
if ((select_delay = timer->when - event_present) < 0) {
select_delay = 0;
} else if (delay >= 0 && select_delay > delay) {
select_delay = delay;
}
} else {
select_delay = delay;
}
if (msg_verbose > 2)
msg_info("event_loop: select_delay %d", select_delay);
/*
* Negative delay means: wait until something happens. Zero delay means:
* poll. Positive delay means: wait at most this long.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
if (select_delay < 0) {
tvp = 0;
} else {
tvp = &tv;
tv.tv_usec = 0;
tv.tv_sec = select_delay;
}
/*
* Pause until the next event happens. When select() has a problem, don't
* go into a tight loop. Allow select() to be interrupted due to the
* arrival of a signal.
*/
rmask = event_rmask;
wmask = event_wmask;
xmask = event_xmask;
event_count = select(event_max_fd + 1, &rmask, &wmask, &xmask, tvp);
if (event_count < 0) {
if (errno != EINTR)
msg_fatal("event_loop: select: %m");
return;
}
#else
EVENT_BUFFER_READ(event_count, event_buf,
sizeof(event_buf) / sizeof(event_buf[0]),
select_delay);
if (event_count < 0) {
if (errno != EINTR)
msg_fatal("event_loop: " EVENT_BUFFER_READ_TEXT ": %m");
return;
}
#endif
/*
* Before entering the application call-back routines, make sure we
* aren't being called from a call-back routine. Doing so would make us
* vulnerable to all kinds of race conditions.
*/
if (nested++ > 0)
msg_panic("event_loop: recursive call");
/*
* Deliver timer events. Allow the application to add/delete timer queue
* requests while it is being called back. Requests are sorted: we keep
* running over the timer request queue from the start, and stop when we
* reach the future or the list end. We also stop when we reach a timer
* request that was added by a call-back that was invoked from this
* event_loop() call instance, for reasons that are explained below.
*
* To avoid dangling pointer problems 1) we must remove a request from the
* timer queue before delivering its event to the application and 2) we
* must look up the next timer request *after* calling the application.
* The latter complicates the handling of zero-delay timer requests that
* are added by event_loop() call-back functions.
*
* XXX When a timer event call-back function adds a new timer request,
* event_request_timer() labels the request with the event_loop() call
* instance that invoked the timer event call-back. We use this instance
* label here to prevent zero-delay timer requests from running in a
* tight loop and starving I/O events. To make this solution work,
* event_request_timer() appends a new request after existing requests
* for the same time slot.
*/
event_present = time((time_t *) 0);
event_loop_instance += 1;
while ((timer = FIRST_TIMER(&event_timer_head)) != 0) {
if (timer->when > event_present)
break;
if (timer->loop_instance == event_loop_instance)
break;
ring_detach(&timer->ring); /* first this */
if (msg_verbose > 2)
msg_info("%s: timer 0x%lx 0x%lx", myname,
(long) timer->callback, (long) timer->context);
timer->callback(EVENT_TIME, timer->context); /* then this */
myfree((void *) timer);
}
/*
* Deliver I/O events. Allow the application to cancel event requests
* while it is being called back. To this end, we keep an eye on the
* contents of event_xmask, so that we deliver only events that are still
* wanted. We do not change the event request masks. It is up to the
* application to determine when a read or write is complete.
*/
#if (EVENTS_STYLE == EVENTS_STYLE_SELECT)
if (event_count > 0) {
for (new_max_fd = 0, fd = 0; fd <= event_max_fd; fd++) {
if (FD_ISSET(fd, &event_xmask)) {
new_max_fd = fd;
/* In case event_fdtable is updated. */
fdp = event_fdtable + fd;
if (FD_ISSET(fd, &xmask)) {
if (msg_verbose > 2)
msg_info("%s: exception fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback, (long) fdp->context);
fdp->callback(EVENT_XCPT, fdp->context);
} else if (FD_ISSET(fd, &wmask)) {
if (msg_verbose > 2)
msg_info("%s: write fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback, (long) fdp->context);
fdp->callback(EVENT_WRITE, fdp->context);
} else if (FD_ISSET(fd, &rmask)) {
if (msg_verbose > 2)
msg_info("%s: read fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback, (long) fdp->context);
fdp->callback(EVENT_READ, fdp->context);
}
}
}
event_max_fd = new_max_fd;
}
#else
for (bp = event_buf; bp < event_buf + event_count; bp++) {
fd = EVENT_GET_FD(bp);
if (fd < 0 || fd > event_max_fd)
msg_panic("%s: bad file descriptor: %d", myname, fd);
if (EVENT_MASK_ISSET(fd, &event_xmask)) {
fdp = event_fdtable + fd;
if (EVENT_TEST_READ(bp)) {
if (msg_verbose > 2)
msg_info("%s: read fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback, (long) fdp->context);
fdp->callback(EVENT_READ, fdp->context);
} else if (EVENT_TEST_WRITE(bp)) {
if (msg_verbose > 2)
msg_info("%s: write fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback,
(long) fdp->context);
fdp->callback(EVENT_WRITE, fdp->context);
} else {
if (msg_verbose > 2)
msg_info("%s: other fd=%d act=0x%lx 0x%lx", myname,
fd, (long) fdp->callback, (long) fdp->context);
fdp->callback(EVENT_XCPT, fdp->context);
}
}
}
#endif
nested--;
}
#ifdef TEST
/*
* Proof-of-concept test program for the event manager. Schedule a series of
* events at one-second intervals and let them happen, while echoing any
* lines read from stdin.
*/
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
/* timer_event - display event */
static void timer_event(int unused_event, void *context)
{
printf("%ld: %s\n", (long) event_present, context);
fflush(stdout);
}
/* echo - echo text received on stdin */
static void echo(int unused_event, void *unused_context)
{
char buf[BUFSIZ];
if (fgets(buf, sizeof(buf), stdin) == 0)
exit(0);
printf("Result: %s", buf);
}
/* request - request a bunch of timer events */
static void request(int unused_event, void *unused_context)
{
event_request_timer(timer_event, "3 first", 3);
event_request_timer(timer_event, "3 second", 3);
event_request_timer(timer_event, "4 first", 4);
event_request_timer(timer_event, "4 second", 4);
event_request_timer(timer_event, "2 first", 2);
event_request_timer(timer_event, "2 second", 2);
event_request_timer(timer_event, "1 first", 1);
event_request_timer(timer_event, "1 second", 1);
event_request_timer(timer_event, "0 first", 0);
event_request_timer(timer_event, "0 second", 0);
}
int main(int argc, void **argv)
{
if (argv[1])
msg_verbose = atoi(argv[1]);
event_request_timer(request, (void *) 0, 0);
event_enable_read(fileno(stdin), echo, (void *) 0);
event_drain(10);
exit(0);
}
#endif