diff --git a/buffer.c b/buffer.c index 50908b19..8a32b7ef 100644 --- a/buffer.c +++ b/buffer.c @@ -63,6 +63,10 @@ #ifdef _EVENT_HAVE_SYS_SENDFILE_H #include #endif +#ifdef _EVENT_HAVE_SYS_STAT_H +#include +#endif + #include #include @@ -111,14 +115,6 @@ #define SENDFILE_IS_SOLARIS 1 #endif -#ifdef USE_SENDFILE -static int use_sendfile = 1; -#endif -#ifdef _EVENT_HAVE_MMAP -static int use_mmap = 1; -#endif - - /* Mask of user-selectable callback flags. */ #define EVBUFFER_CB_USER_FLAGS 0xffff /* Mask of all internal-use-only flags. */ @@ -144,13 +140,6 @@ static int evbuffer_ptr_memcmp(const struct evbuffer *buf, static struct evbuffer_chain *evbuffer_expand_singlechain(struct evbuffer *buf, size_t datlen); -#ifdef WIN32 -static int evbuffer_readfile(struct evbuffer *buf, evutil_socket_t fd, - ev_ssize_t howmuch); -#else -#define evbuffer_readfile evbuffer_read -#endif - static struct evbuffer_chain * evbuffer_chain_new(size_t size) { @@ -187,40 +176,29 @@ evbuffer_chain_free(struct evbuffer_chain *chain) chain->flags |= EVBUFFER_DANGLING; return; } - if (chain->flags & (EVBUFFER_MMAP|EVBUFFER_SENDFILE| - EVBUFFER_REFERENCE)) { - if (chain->flags & EVBUFFER_REFERENCE) { - struct evbuffer_chain_reference *info = - EVBUFFER_CHAIN_EXTRA( - struct evbuffer_chain_reference, - chain); - if (info->cleanupfn) - (*info->cleanupfn)(chain->buffer, - chain->buffer_len, - info->extra); - } -#ifdef _EVENT_HAVE_MMAP - if (chain->flags & EVBUFFER_MMAP) { - struct evbuffer_chain_fd *info = - EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_fd, - chain); - if (munmap(chain->buffer, chain->buffer_len) == -1) - event_warn("%s: munmap failed", __func__); - if (close(info->fd) == -1) - event_warn("%s: close(%d) failed", - __func__, info->fd); - } + + if (chain->flags & EVBUFFER_REFERENCE) { + struct evbuffer_chain_reference *info = + EVBUFFER_CHAIN_EXTRA( + struct evbuffer_chain_reference, + chain); + if (info->cleanupfn) + (*info->cleanupfn)(chain->buffer, + chain->buffer_len, + info->extra); + } + if (chain->flags & EVBUFFER_FILESEGMENT) { + struct evbuffer_chain_file_segment *info = + EVBUFFER_CHAIN_EXTRA( + struct evbuffer_chain_file_segment, + chain); + if (info->segment) { +#ifdef WIN32 + if (info->segment->type == EVBUF_FS_MMAP) + UnmapViewOfFile(chain->buffer); #endif -#ifdef USE_SENDFILE - if (chain->flags & EVBUFFER_SENDFILE) { - struct evbuffer_chain_fd *info = - EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_fd, - chain); - if (close(info->fd) == -1) - event_warn("%s: close(%d) failed", - __func__, info->fd); + evbuffer_file_segment_free(info->segment); } -#endif } mm_free(chain); @@ -2124,56 +2102,6 @@ done: return result; } -#ifdef WIN32 -static int -evbuffer_readfile(struct evbuffer *buf, evutil_socket_t fd, ev_ssize_t howmuch) -{ - int result; - int nchains, n; - struct evbuffer_iovec v[2]; - - EVBUFFER_LOCK(buf); - - if (buf->freeze_end) { - result = -1; - goto done; - } - - if (howmuch < 0) - howmuch = 16384; - - - /* XXX we _will_ waste some space here if there is any space left - * over on buf->last. */ - nchains = evbuffer_reserve_space(buf, howmuch, v, 2); - if (nchains < 1 || nchains > 2) { - result = -1; - goto done; - } - n = read((int)fd, v[0].iov_base, (unsigned int)v[0].iov_len); - if (n <= 0) { - result = n; - goto done; - } - v[0].iov_len = (IOV_LEN_TYPE) n; /* XXXX another problem with big n.*/ - if (nchains > 1) { - n = read((int)fd, v[1].iov_base, (unsigned int)v[1].iov_len); - if (n <= 0) { - result = (unsigned long) v[0].iov_len; - evbuffer_commit_space(buf, v, 1); - goto done; - } - v[1].iov_len = n; - } - evbuffer_commit_space(buf, v, nchains); - - result = n; -done: - EVBUFFER_UNLOCK(buf); - return result; -} -#endif - #ifdef USE_IOVEC_IMPL static inline int evbuffer_write_iovec(struct evbuffer *buffer, evutil_socket_t fd, @@ -2225,44 +2153,46 @@ evbuffer_write_iovec(struct evbuffer *buffer, evutil_socket_t fd, #ifdef USE_SENDFILE static inline int -evbuffer_write_sendfile(struct evbuffer *buffer, evutil_socket_t fd, +evbuffer_write_sendfile(struct evbuffer *buffer, evutil_socket_t dest_fd, ev_ssize_t howmuch) { struct evbuffer_chain *chain = buffer->first; - struct evbuffer_chain_fd *info = - EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_fd, chain); + struct evbuffer_chain_file_segment *info = + EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_file_segment, + chain); + const int source_fd = info->segment->fd; #if defined(SENDFILE_IS_MACOSX) || defined(SENDFILE_IS_FREEBSD) int res; - off_t len = chain->off; + ev_off_t len = chain->off; #elif defined(SENDFILE_IS_LINUX) || defined(SENDFILE_IS_SOLARIS) ev_ssize_t res; - off_t offset = chain->misalign; + ev_off_t offset = chain->misalign; #endif ASSERT_EVBUFFER_LOCKED(buffer); #if defined(SENDFILE_IS_MACOSX) - res = sendfile(info->fd, fd, chain->misalign, &len, NULL, 0); + res = sendfile(source_fd, dest_fd, chain->misalign, &len, NULL, 0); if (res == -1 && !EVUTIL_ERR_RW_RETRIABLE(errno)) return (-1); return (len); #elif defined(SENDFILE_IS_FREEBSD) - res = sendfile(info->fd, fd, chain->misalign, chain->off, NULL, &len, 0); + res = sendfile(source_fd, dest_fd, chain->misalign, chain->off, NULL, &len, 0); if (res == -1 && !EVUTIL_ERR_RW_RETRIABLE(errno)) return (-1); return (len); #elif defined(SENDFILE_IS_LINUX) /* TODO(niels): implement splice */ - res = sendfile(fd, info->fd, &offset, chain->off); + res = sendfile(dest_fd, source_fd, &offset, chain->off); if (res == -1 && EVUTIL_ERR_RW_RETRIABLE(errno)) { /* if this is EAGAIN or EINTR return 0; otherwise, -1 */ return (0); } return (res); #elif defined(SENDFILE_IS_SOLARIS) - res = sendfile(fd, info->fd, &offset, chain->off); + res = sendfile(dest_fd, source_fd, &offset, chain->off); if (res == -1 && EVUTIL_ERR_RW_RETRIABLE(errno)) { /* if this is EAGAIN or EINTR return 0; otherwise, -1 */ return (0); @@ -2654,153 +2584,286 @@ done: return result; } -/* TODO(niels): maybe we don't want to own the fd, however, in that - * case, we should dup it - dup is cheap. Perhaps, we should use a - * callback instead? - */ /* TODO(niels): we may want to add to automagically convert to mmap, in * case evbuffer_remove() or evbuffer_pullup() are being used. */ -int -evbuffer_add_file(struct evbuffer *outbuf, int fd, - ev_off_t offset, ev_off_t length) +struct evbuffer_file_segment * +evbuffer_file_segment_new( + int fd, ev_off_t offset, ev_off_t length, unsigned flags) { -#if defined(USE_SENDFILE) || defined(_EVENT_HAVE_MMAP) - struct evbuffer_chain *chain; - struct evbuffer_chain_fd *info; + struct evbuffer_file_segment *seg = + mm_calloc(sizeof(struct evbuffer_file_segment), 1); + if (!seg) + return NULL; + seg->refcnt = 1; + seg->fd = fd; + seg->flags = flags; + +#ifdef WIN32 +#define lseek _lseeki64 +#define fstat _fstat +#define stat _stat #endif - int ok = 1; + if (length == -1) { + struct stat st; + if (fstat(fd, &st) < 0) + goto err; + length = st.st_size; + } + seg->length = length; #if defined(USE_SENDFILE) - if (use_sendfile) { - chain = evbuffer_chain_new(sizeof(struct evbuffer_chain_fd)); - if (chain == NULL) { - event_warn("%s: out of memory", __func__); - return (-1); - } - - chain->flags |= EVBUFFER_SENDFILE | EVBUFFER_IMMUTABLE; - chain->buffer = NULL; /* no reading possible */ - chain->buffer_len = length + offset; - chain->off = length; - chain->misalign = offset; - - info = EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_fd, chain); - info->fd = fd; - - EVBUFFER_LOCK(outbuf); - if (outbuf->freeze_end) { - mm_free(chain); - ok = 0; - } else { - outbuf->n_add_for_cb += length; - evbuffer_chain_insert(outbuf, chain); - } - } else + if (!(flags & EVBUF_FS_DISABLE_SENDFILE)) { + seg->offset = offset; + seg->type = EVBUF_FS_SENDFILE; + goto done; + } #endif #if defined(_EVENT_HAVE_MMAP) - if (use_mmap) { - void *mapped = mmap(NULL, length + offset, PROT_READ, + if (!(flags & EVBUF_FS_DISABLE_MMAP)) { + off_t offset_rounded = 0, offset_leftover = 0; + void *mapped; + if (offset) { + /* mmap implementations don't generally like us + * to have an offset that isn't a round */ +#ifdef SC_PAGE_SIZE + long page_size = sysconf(SC_PAGE_SIZE); +#elif defined(_SC_PAGE_SIZE) + long page_size = sysconf(_SC_PAGE_SIZE); +#else + long page_size = 1; +#endif + if (page_size == -1) + goto err; + offset_leftover = offset % page_size; + offset_rounded = offset - offset_leftover; + } + mapped = mmap(NULL, length + offset_leftover, + PROT_READ, #ifdef MAP_NOCACHE - MAP_NOCACHE | + MAP_NOCACHE | /* ??? */ #endif #ifdef MAP_FILE MAP_FILE | #endif MAP_PRIVATE, - fd, 0); - /* some mmap implementations require offset to be a multiple of - * the page size. most users of this api, are likely to use 0 - * so mapping everything is not likely to be a problem. - * TODO(niels): determine page size and round offset to that - * page size to avoid mapping too much memory. - */ + fd, offset_rounded); if (mapped == MAP_FAILED) { event_warn("%s: mmap(%d, %d, %zu) failed", __func__, fd, 0, (size_t)(offset + length)); - return (-1); - } - chain = evbuffer_chain_new(sizeof(struct evbuffer_chain_fd)); - if (chain == NULL) { - event_warn("%s: out of memory", __func__); - munmap(mapped, length); - return (-1); - } - - chain->flags |= EVBUFFER_MMAP | EVBUFFER_IMMUTABLE; - chain->buffer = mapped; - chain->buffer_len = length + offset; - chain->off = length + offset; - - info = EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_fd, chain); - info->fd = fd; - - EVBUFFER_LOCK(outbuf); - if (outbuf->freeze_end) { - info->fd = -1; - evbuffer_chain_free(chain); - ok = 0; } else { - outbuf->n_add_for_cb += length; - - evbuffer_chain_insert(outbuf, chain); - - /* we need to subtract whatever we don't need */ - evbuffer_drain(outbuf, offset); - } - } else -#endif - { - /* the default implementation */ - struct evbuffer *tmp = evbuffer_new(); - ev_ssize_t read; - - if (tmp == NULL) - return (-1); - -#ifdef WIN32 -#define lseek _lseeki64 -#endif - if (lseek(fd, offset, SEEK_SET) == -1) { - evbuffer_free(tmp); - return (-1); - } - - /* we add everything to a temporary buffer, so that we - * can abort without side effects if the read fails. - */ - while (length) { - read = evbuffer_readfile(tmp, fd, (ev_ssize_t)length); - if (read == -1) { - evbuffer_free(tmp); - return (-1); - } - - length -= read; - } - - EVBUFFER_LOCK(outbuf); - if (outbuf->freeze_end) { - evbuffer_free(tmp); - ok = 0; - } else { - evbuffer_add_buffer(outbuf, tmp); - evbuffer_free(tmp); - -#ifdef WIN32 -#define close _close -#endif - close(fd); + seg->mapping = mapped; + seg->contents = (char*)mapped+offset_leftover; + seg->offset = 0; + seg->type = EVBUF_FS_MMAP; + goto done; } } +#endif +#ifdef WIN32 + if (!(flags & EVBUF_FS_DISABLE_MMAP)) { + long h = (long)_get_osfhandle(fd); + HANDLE m; + ev_uint64_t total_size = length+offset; + if (h == (long)INVALID_HANDLE_VALUE) + return NULL; + m = CreateFileMapping((HANDLE)h, NULL, PAGE_READONLY, + (total_size >> 32), total_size & 0xfffffffful, + NULL); + if (m != INVALID_HANDLE_VALUE) { /* Does h leak? */ + seg->mapping_handle = m; + seg->offset = offset; + seg->type = EVBUF_FS_MMAP; + goto done; + } + } +#endif - if (ok) - evbuffer_invoke_callbacks(outbuf); - EVBUFFER_UNLOCK(outbuf); + { + ev_off_t start_pos = lseek(fd, 0, SEEK_CUR), pos; + ev_off_t read_so_far = 0; + char *mem; + int e; + ev_ssize_t n = 0; + if (!(mem = mm_malloc(length))) + goto err; + if (start_pos < 0) { + mm_free(mem); + goto err; + } + if (lseek(fd, offset, SEEK_SET) < 0) { + mm_free(mem); + goto err; + } + while (read_so_far < length) { + n = read(fd, mem+read_so_far, length-read_so_far); + if (n <= 0) + break; + read_so_far += n; + } - return ok ? 0 : -1; + e = errno; + pos = lseek(fd, start_pos, SEEK_SET); + if (n < 0 || (n == 0 && length > read_so_far)) { + mm_free(mem); + errno = e; + goto err; + } else if (pos < 0) { + mm_free(mem); + goto err; + } + + seg->contents = mem; + seg->type = EVBUF_FS_IO; + } + +done: + if (!(flags & EVBUF_FS_DISABLE_LOCKING)) { + EVTHREAD_ALLOC_LOCK(seg->lock, 0); + } + return seg; +err: + mm_free(seg); + return NULL; } +void +evbuffer_file_segment_free(struct evbuffer_file_segment *seg) +{ + int refcnt; + EVLOCK_LOCK(seg->lock, 0); + refcnt = --seg->refcnt; + EVLOCK_UNLOCK(seg->lock, 0); + if (refcnt > 0) + return; + EVUTIL_ASSERT(refcnt == 0); + + if (seg->type == EVBUF_FS_SENDFILE) { + ; + } else if (seg->type == EVBUF_FS_MMAP) { +#ifdef WIN32 + CloseHandle(seg->mapping_handle); +#elif defined (_EVENT_HAVE_MMAP) + if (munmap(seg->mapping, seg->length) == -1) + event_warn("%s: munmap failed", __func__); +#endif + } else { + EVUTIL_ASSERT(seg->type == EVBUF_FS_IO); + mm_free(seg->contents); + } + + if ((seg->flags & EVBUF_FS_CLOSE_ON_FREE) && seg->fd >= 0) { + close(seg->fd); + } + + EVTHREAD_FREE_LOCK(seg->lock, 0); + mm_free(seg); +} + +int +evbuffer_add_file_segment(struct evbuffer *buf, + struct evbuffer_file_segment *seg, ev_off_t offset, ev_off_t length) +{ + struct evbuffer_chain *chain; + struct evbuffer_chain_file_segment *extra; + + EVLOCK_LOCK(seg->lock, 0); + ++seg->refcnt; + EVLOCK_UNLOCK(seg->lock, 0); + + EVBUFFER_LOCK(buf); + + if (buf->freeze_end) + goto err; + + if (length < 0) { + if (offset > seg->length) + goto err; + length = seg->length - offset; + } + + /* Can we actually add this? */ + if (offset+length > seg->length) + goto err; + + chain = evbuffer_chain_new(sizeof(struct evbuffer_chain_file_segment)); + if (!chain) + goto err; + extra = EVBUFFER_CHAIN_EXTRA(struct evbuffer_chain_file_segment, chain); + + chain->flags |= EVBUFFER_IMMUTABLE|EVBUFFER_FILESEGMENT; + if (seg->type == EVBUF_FS_SENDFILE) { + chain->flags |= EVBUFFER_SENDFILE; + chain->misalign = seg->offset + offset; + chain->off = length; + chain->buffer_len = chain->misalign + length; + } else if (seg->type == EVBUF_FS_MMAP) { +#ifdef WIN32 + ev_uint64_t total_offset = seg->offset+offset; + ev_uint64_t offset_rounded=0, offset_remaining=0; + LPVOID data; + if (total_offset) { + SYSTEM_INFO si; + memset(&si, 0, sizeof(si)); /* cargo cult */ + GetSystemInfo(&si); + offset_remaining = total_offset % si.dwAllocationGranularity; + offset_rounded = total_offset - offset_remaining; + } + data = MapViewOfFile( + seg->mapping_handle, + FILE_MAP_READ, + offset_rounded >> 32, + offset_rounded & 0xfffffffful, + length); + if (data == NULL) { + mm_free(chain); + goto err; + } + chain->buffer = (unsigned char*) data; + chain->buffer_len = length+offset_remaining; + chain->misalign = offset_remaining; + chain->off = length; +#else + chain->buffer = (unsigned char*)(seg->contents + offset); + chain->buffer_len = length; + chain->off = length; +#endif + } else { + EVUTIL_ASSERT(seg->type == EVBUF_FS_IO); + chain->buffer = (unsigned char*)(seg->contents + offset); + chain->buffer_len = length; + chain->off = length; + } + + extra->segment = seg; + buf->n_add_for_cb += length; + evbuffer_chain_insert(buf, chain); + + evbuffer_invoke_callbacks(buf); + + EVBUFFER_UNLOCK(buf); + + return 0; +err: + EVBUFFER_UNLOCK(buf); + evbuffer_file_segment_free(seg); + return -1; +} + +int +evbuffer_add_file(struct evbuffer *buf, int fd, ev_off_t offset, ev_off_t length) +{ + struct evbuffer_file_segment *seg; + unsigned flags = EVBUF_FS_CLOSE_ON_FREE; + int r; + + seg = evbuffer_file_segment_new(fd, offset, length, flags); + if (!seg) + return -1; + r = evbuffer_add_file_segment(buf, seg, 0, length); + evbuffer_file_segment_free(seg); + return r; +} void evbuffer_setcb(struct evbuffer *buffer, evbuffer_cb cb, void *cbarg) @@ -2936,50 +2999,3 @@ evbuffer_cb_unsuspend(struct evbuffer *buffer, struct evbuffer_cb_entry *cb) } #endif -/* These hooks are exposed so that the unit tests can temporarily disable - * sendfile support in order to test mmap, or both to test linear - * access. Don't use it; if we need to add a way to disable sendfile support - * in the future, it will probably be via an alternate version of - * evbuffer_add_file() with a 'flags' argument. - */ -int _evbuffer_testing_use_sendfile(void); -int _evbuffer_testing_use_mmap(void); -int _evbuffer_testing_use_linear_file_access(void); - -int -_evbuffer_testing_use_sendfile(void) -{ - int ok = 0; -#ifdef USE_SENDFILE - use_sendfile = 1; - ok = 1; -#endif -#ifdef _EVENT_HAVE_MMAP - use_mmap = 0; -#endif - return ok; -} -int -_evbuffer_testing_use_mmap(void) -{ - int ok = 0; -#ifdef USE_SENDFILE - use_sendfile = 0; -#endif -#ifdef _EVENT_HAVE_MMAP - use_mmap = 1; - ok = 1; -#endif - return ok; -} -int -_evbuffer_testing_use_linear_file_access(void) -{ -#ifdef USE_SENDFILE - use_sendfile = 0; -#endif -#ifdef _EVENT_HAVE_MMAP - use_mmap = 0; -#endif - return 1; -} diff --git a/evbuffer-internal.h b/evbuffer-internal.h index 7fc8b914..e5937b05 100644 --- a/evbuffer-internal.h +++ b/evbuffer-internal.h @@ -170,8 +170,8 @@ struct evbuffer_chain { /** Set if special handling is required for this chain */ unsigned flags; -#define EVBUFFER_MMAP 0x0001 /**< memory in buffer is mmaped */ -#define EVBUFFER_SENDFILE 0x0002 /**< a chain used for sendfile */ +#define EVBUFFER_FILESEGMENT 0x0001 /**< A chain used for a file segment */ +#define EVBUFFER_SENDFILE 0x0002 /**< a chain used with sendfile */ #define EVBUFFER_REFERENCE 0x0004 /**< a chain with a mem reference */ #define EVBUFFER_IMMUTABLE 0x0008 /**< read-only chain */ /** a chain that mustn't be reallocated or freed, or have its contents @@ -192,21 +192,53 @@ struct evbuffer_chain { unsigned char *buffer; }; -/* this is currently used by both mmap and sendfile */ -/* TODO(niels): something strange needs to happen for Windows here, I am not - * sure what that is, but it needs to get looked into. - */ -struct evbuffer_chain_fd { - int fd; /**< the fd associated with this chain */ -}; - -/** callback for a reference buffer; lets us know what to do with it when - * we're done with it. */ +/** callback for a reference chain; lets us know what to do with it when + * we're done with it. Lives at the end of an evbuffer_chain with the + * EVBUFFER_REFERENCE flag set */ struct evbuffer_chain_reference { evbuffer_ref_cleanup_cb cleanupfn; void *extra; }; +/** File segment for a file-segment chain. Lives at the end of an + * evbuffer_chain with the EVBUFFER_FILESEGMENT flag set. */ +struct evbuffer_chain_file_segment { + struct evbuffer_file_segment *segment; +#ifdef WIN32 + /** If we're using CreateFileMapping, this is the handle to the view. */ + HANDLE view_handle; +#endif +}; + +/* Declared in event2/buffer.h; defined here. */ +struct evbuffer_file_segment { + void *lock; /**< lock prevent concurrent access to refcnt */ + int refcnt; /**< Reference count for this file segment */ + unsigned flags; /**< combination of EVBUF_FS_* flags */ + + /** What kind of file segment is this? */ + enum {EVBUF_FS_MMAP, EVBUF_FS_SENDFILE, EVBUF_FS_IO} type; + + /** The fd that we read the data from. */ + int fd; + /** If we're using mmap, this is the raw mapped memory. */ + void *mapping; +#ifdef WIN32 + /** If we're using CreateFileMapping, this is the mapping */ + HANDLE mapping_handle; +#endif + /** If we're using mmap or IO, this is the content of the file + * segment. */ + char *contents; + /** If we're using mmap, this is the offset within 'mapping' where + * this data segment begins. If we're using sendfile, this is the + * offset within the file where this data begins. If we're using IO, + * this is 0. */ + ev_off_t offset; + /** The length of this segment. */ + ev_off_t length; +}; + #define EVBUFFER_CHAIN_SIZE sizeof(struct evbuffer_chain) /** Return a pointer to extra data allocated along with an evbuffer. */ #define EVBUFFER_CHAIN_EXTRA(t, c) (t *)((struct evbuffer_chain *)(c) + 1) diff --git a/include/event2/buffer.h b/include/event2/buffer.h index d6538071..7e2fc315 100644 --- a/include/event2/buffer.h +++ b/include/event2/buffer.h @@ -368,16 +368,117 @@ int evbuffer_add_reference(struct evbuffer *outbuf, The results of using evbuffer_remove() or evbuffer_pullup() are undefined. + For more fine-grained control, use evbuffer_add_file_segment. + @param outbuf the output buffer @param fd the file descriptor @param off the offset from which to read data - @param length how much data to read + @param length how much data to read, or -1 to read as much as possible. + (-1 requires that 'fd' support fstat.) @return 0 if successful, or -1 if an error occurred */ int evbuffer_add_file(struct evbuffer *output, int fd, ev_off_t offset, ev_off_t length); +/** + An evbuffer_file_segment holds a reference to a range of a file -- + possibly the whole file! -- for use in writing from an evbuffer to a + socket. It could be implemented with mmap, sendfile, splice, or (if all + else fails) by just pulling all the data into RAM. A single + evbuffer_file_segment can be added more than once, and to more than one + evbuffer. + */ +struct evbuffer_file_segment; + +/** + Flag for creating evbuffer_file_segment: If this flag is set, then when + the evbuffer_file_segment is freed and no longer in use by any + evbuffer, the underlying fd is closed. + */ +#define EVBUF_FS_CLOSE_ON_FREE 0x01 +/** + Flag for creating evbuffer_file_segment: Disable memory-map based + implementations. + */ +#define EVBUF_FS_DISABLE_MMAP 0x02 +/** + Flag for creating evbuffer_file_segment: Disable direct fd-to-fd + implementations (including sendfile and splice). + + You might want to use this option if data needs to be taken from the + evbuffer by any means other than writing it to the network: the sendfile + backend is fast, but it only works for sending files directly to the + network. + */ +#define EVBUF_FS_DISABLE_SENDFILE 0x04 +/** + Flag for creating evbuffer_file_segment: Do not allocate a lock for this + segment. If this option is set, then neither the segment nor any + evbuffer it is added to may ever be accessed from more than one thread + at a time. + */ +#define EVBUF_FS_DISABLE_LOCKING 0x08 + +/** + Create and return a new evbuffer_file_segment for reading data from a + file and sending it out via an evbuffer. + + This function avoids unnecessary data copies between userland and + kernel. Where available, it uses sendfile or splice. + + The file descriptor must not be closed so long as any evbuffer is using + this segment. + + The results of using evbuffer_remove() or evbuffer_pullup() or any other + function that reads bytes from an evbuffer on any evbuffer containing + the newly returned segment are undefined, unless you pass the + EVBUF_FS_DISABLE_SENDFILE flag to this function. + + @param fd an open file to read from. + @param offset an index within the file at which to start reading + @param length how much data to read, or -1 to read as much as possible. + (-1 requires that 'fd' support fstat.) + @param flags any number of the EVBUF_FS_* flags + @return a new evbuffer_file_segment, or NULL on failure. + **/ +struct evbuffer_file_segment *evbuffer_file_segment_new( + int fd, ev_off_t offset, ev_off_t length, unsigned flags); + +/** + Free an evbuffer_file_segment + + It is safe to call this function even if the segment has been added to + one or more evbuffers. The evbuffer_file_segment will not be freed + until no more references to it exist. + */ +void evbuffer_file_segment_free(struct evbuffer_file_segment *seg); + +/** + Insert some or all of an evbuffer_file_segment at the end of an evbuffer + + Note that the offset and length parameters of this function have a + different meaning from those provided to evbuffer_file_segment_new: When + you create the segment, the offset is the offset _within the file_, and + the length is the length _of the segment_, whereas when you add a + segment to an evbuffer, the offset is _within the segment_ and the + length is the length of the _part of the segment you want to use. + + In other words, if you have a 10 KiB file, and you create an + evbuffer_file_segment for it with offset 20 and length 1000, it will + refer to bytes 20..1019 inclusive. If you then pass this segment to + evbuffer_add_file_segment and specify an offset of 20 and a length of + 50, you will be adding bytes 40..99 inclusive. + + @param buf the evbuffer to append to + @param seg the segment to add + @param offset the offset within the segment to start from + @param length the amount of data to add, or -1 to add it all. + @return 0 on success, -1 on failure. + */ +int evbuffer_add_file_segment(struct evbuffer *buf, + struct evbuffer_file_segment *seg, ev_off_t offset, ev_off_t length); + /** Append a formatted string to the end of an evbuffer. diff --git a/test/regress.h b/test/regress.h index 3cbd7cd2..b9324dcb 100644 --- a/test/regress.h +++ b/test/regress.h @@ -62,7 +62,7 @@ extern int called; extern struct event_base *global_base; extern int in_legacy_test_wrapper; -int regress_make_tmpfile(const void *data, size_t datalen); +int regress_make_tmpfile(const void *data, size_t datalen, char **filename_out); struct basic_test_data { struct event_base *base; diff --git a/test/regress_buffer.c b/test/regress_buffer.c index 4f4a8303..f376f31e 100644 --- a/test/regress_buffer.c +++ b/test/regress_buffer.c @@ -58,6 +58,7 @@ #include "evbuffer-internal.h" #include "log-internal.h" +#include "util-internal.h" #include "regress.h" @@ -583,41 +584,173 @@ test_evbuffer_reference(void *ptr) evbuffer_free(src); } -int _evbuffer_testing_use_sendfile(void); -int _evbuffer_testing_use_mmap(void); -int _evbuffer_testing_use_linear_file_access(void); +static struct event_base *addfile_test_event_base = NULL; +static int addfile_test_done_writing = 0; +static int addfile_test_total_written = 0; +static int addfile_test_total_read = 0; + +static void +addfile_test_writecb(evutil_socket_t fd, short what, void *arg) +{ + struct evbuffer *b = arg; + int r; + evbuffer_validate(b); + while (evbuffer_get_length(b)) { + r = evbuffer_write(b, fd); + if (r > 0) { + addfile_test_total_written += r; + TT_BLATHER(("Wrote %d/%d bytes", r, addfile_test_total_written)); + } else { + int e = evutil_socket_geterror(fd); + if (EVUTIL_ERR_RW_RETRIABLE(e)) + return; + tt_fail_perror("write"); + event_base_loopexit(addfile_test_event_base,NULL); + } + evbuffer_validate(b); + } + addfile_test_done_writing = 1; + return; +end: + event_base_loopexit(addfile_test_event_base,NULL); +} + +static void +addfile_test_readcb(evutil_socket_t fd, short what, void *arg) +{ + struct evbuffer *b = arg; + int e, r = 0; + do { + int r = evbuffer_read(b, fd, 1024); + if (r > 0) { + addfile_test_total_read += r; + TT_BLATHER(("Read %d/%d bytes", r, addfile_test_total_read)); + } + } while (r > 0); + if (r < 0) { + e = evutil_socket_geterror(fd); + if (! EVUTIL_ERR_RW_RETRIABLE(e)) { + tt_fail_perror("read"); + event_base_loopexit(addfile_test_event_base,NULL); + } + } + if (addfile_test_done_writing && + addfile_test_total_read >= addfile_test_total_written) { + event_base_loopexit(addfile_test_event_base,NULL); + } +} static void test_evbuffer_add_file(void *ptr) { - const char *impl = ptr; - struct evbuffer *src = evbuffer_new(); - const char *data = "this is what we add as file system data."; - size_t datalen; + struct basic_test_data *testdata = ptr; + const char *impl = testdata->setup_data; + struct evbuffer *src = evbuffer_new(), *dest = evbuffer_new(); + char *tmpfilename = NULL; + char *data = NULL; + const char *expect_data; + size_t datalen, expect_len; const char *compare; int fd = -1; + int want_type = 0; + unsigned flags = 0; + int use_segment = 1, use_bigfile = 0, map_from_offset = 0, + view_from_offset = 0; + struct evbuffer_file_segment *seg = NULL; + ev_off_t starting_offset = 0, mapping_len = -1; + ev_off_t segment_offset = 0, segment_len = -1; + struct event *rev=NULL, *wev=NULL; + struct event_base *base = testdata->base; evutil_socket_t pair[2] = {-1, -1}; - int r=0, n_written=0; - - /* Add a test for a big file. XXXX */ + /* This test is highly parameterized based on substrings of its + * argument. The strings are: */ tt_assert(impl); - if (!strcmp(impl, "sendfile")) { - if (!_evbuffer_testing_use_sendfile()) - tt_skip(); - TT_BLATHER(("Using sendfile-based implementaion")); - } else if (!strcmp(impl, "mmap")) { - if (!_evbuffer_testing_use_mmap()) - tt_skip(); - TT_BLATHER(("Using mmap-based implementaion")); - } else if (!strcmp(impl, "linear")) { - if (!_evbuffer_testing_use_linear_file_access()) - tt_skip(); - TT_BLATHER(("Using read-based implementaion")); + if (strstr(impl, "nosegment")) { + /* If nosegment is set, use the older evbuffer_add_file + * interface */ + use_segment = 0; + } + if (strstr(impl, "bigfile")) { + /* If bigfile is set, use a 512K file. Else use a smaller + * one. */ + use_bigfile = 1; + } + if (strstr(impl, "map_offset")) { + /* If map_offset is set, we build the file segment starting + * from a point other than byte 0 and ending somewhere other + * than the last byte. Otherwise we map the whole thing */ + map_from_offset = 1; + } + if (strstr(impl, "offset_in_segment")) { + /* If offset_in_segment is set, we add a subsection of the + * file semgment starting from a point other than byte 0 of + * the segment. */ + view_from_offset = 1; + } + if (strstr(impl, "sendfile")) { + /* If sendfile is set, we try to use a sendfile/splice style + * backend. */ + flags = EVBUF_FS_DISABLE_MMAP; + want_type = EVBUF_FS_SENDFILE; + } else if (strstr(impl, "mmap")) { + /* If sendfile is set, we try to use a mmap/CreateFileMapping + * style backend. */ + flags = EVBUF_FS_DISABLE_SENDFILE; + want_type = EVBUF_FS_MMAP; + } else if (strstr(impl, "linear")) { + /* If linear is set, we try to use a read-the-whole-thing + * backend. */ + flags = EVBUF_FS_DISABLE_SENDFILE|EVBUF_FS_DISABLE_MMAP; + want_type = EVBUF_FS_IO; + } else if (strstr(impl, "default")) { + /* The caller doesn't care which backend we use. */ + ; } else { + /* The caller must choose a backend. */ TT_DIE(("Didn't recognize the implementation")); } + if (use_bigfile) { + unsigned int i; + datalen = 1024*512; + data = malloc(1024*512); + tt_assert(data); + for (i = 0; i < datalen; ++i) + data[i] = _evutil_weakrand(); + } else { + data = strdup("here is a relatively small string."); + tt_assert(data); + datalen = strlen(data); + } + + fd = regress_make_tmpfile(data, datalen, &tmpfilename); + + if (map_from_offset) { + starting_offset = datalen/4 + 1; + mapping_len = datalen / 2 - 1; + expect_data = data + starting_offset; + expect_len = mapping_len; + } else { + expect_data = data; + expect_len = datalen; + } + if (view_from_offset) { + tt_assert(use_segment); /* Can't do this with add_file*/ + segment_offset = expect_len / 3; + segment_len = expect_len / 2; + expect_data = expect_data + segment_offset; + expect_len = segment_len; + } + + if (use_segment) { + seg = evbuffer_file_segment_new(fd, starting_offset, + mapping_len, flags); + tt_assert(seg); + if ((int)seg->type != (int)want_type) + tt_skip(); + } + #if defined(_EVENT_HAVE_SENDFILE) && defined(__sun__) && defined(__svr4__) /* We need to use a pair of AF_INET sockets, since Solaris doesn't support sendfile() over AF_UNIX. */ @@ -627,39 +760,62 @@ test_evbuffer_add_file(void *ptr) if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1) tt_abort_msg("socketpair failed"); #endif - - datalen = strlen(data); - fd = regress_make_tmpfile(data, datalen); + evutil_make_socket_nonblocking(pair[0]); + evutil_make_socket_nonblocking(pair[1]); tt_assert(fd != -1); - tt_assert(evbuffer_add_file(src, fd, 0, datalen) != -1); - - evbuffer_validate(src); - - while (evbuffer_get_length(src) && - (r = evbuffer_write(src, pair[0])) > 0) { - evbuffer_validate(src); - n_written += r; + if (use_segment) { + tt_assert(evbuffer_add_file_segment(src, seg, + segment_offset, segment_len)!=-1); + } else { + tt_assert(evbuffer_add_file(src, fd, starting_offset, + mapping_len) != -1); } - tt_int_op(r, !=, -1); - tt_int_op(n_written, ==, datalen); evbuffer_validate(src); - tt_int_op(evbuffer_read(src, pair[1], (int)strlen(data)), ==, datalen); + + addfile_test_event_base = base; + wev = event_new(base, pair[0], EV_WRITE|EV_PERSIST, + addfile_test_writecb, src); + rev = event_new(base, pair[1], EV_READ|EV_PERSIST, + addfile_test_readcb, dest); + + event_add(wev, NULL); + event_add(rev, NULL); + event_base_dispatch(base); + evbuffer_validate(src); - compare = (char *)evbuffer_pullup(src, datalen); + evbuffer_validate(dest); + + tt_assert(addfile_test_done_writing); + tt_int_op(addfile_test_total_written, ==, expect_len); + tt_int_op(addfile_test_total_read, ==, expect_len); + + compare = (char *)evbuffer_pullup(dest, expect_len); tt_assert(compare != NULL); - if (memcmp(compare, data, datalen)) + if (memcmp(compare, expect_data, expect_len)) { tt_abort_msg("Data from add_file differs."); + } - evbuffer_validate(src); + evbuffer_validate(dest); end: + if (data) + free(data); + if (seg) + evbuffer_file_segment_free(seg); + if (src) + evbuffer_free(src); + if (dest) + evbuffer_free(dest); if (pair[0] >= 0) evutil_closesocket(pair[0]); if (pair[1] >= 0) evutil_closesocket(pair[1]); - evbuffer_free(src); + if (tmpfilename) { + unlink(tmpfilename); + free(tmpfilename); + } } #ifndef _EVENT_DISABLE_MM_REPLACEMENT @@ -1555,13 +1711,30 @@ struct testcase_t evbuffer_testcases[] = { { "peek", test_evbuffer_peek, 0, NULL, NULL }, { "freeze_start", test_evbuffer_freeze, 0, &nil_setup, (void*)"start" }, { "freeze_end", test_evbuffer_freeze, 0, &nil_setup, (void*)"end" }, - /* TODO: need a temp file implementation for Windows */ - { "add_file_sendfile", test_evbuffer_add_file, TT_FORK, &nil_setup, - (void*)"sendfile" }, - { "add_file_mmap", test_evbuffer_add_file, TT_FORK, &nil_setup, - (void*)"mmap" }, - { "add_file_linear", test_evbuffer_add_file, TT_FORK, &nil_setup, - (void*)"linear" }, + +#define ADDFILE_TEST(name, parameters) \ + { name, test_evbuffer_add_file, TT_FORK|TT_NEED_BASE, \ + &basic_setup, (void*)(parameters) } + +#define ADDFILE_TEST_GROUP(name, parameters) \ + ADDFILE_TEST(name "_sendfile", "sendfile " parameters), \ + ADDFILE_TEST(name "_mmap", "mmap " parameters), \ + ADDFILE_TEST(name "_linear", "linear " parameters) + + ADDFILE_TEST_GROUP("add_file", ""), + ADDFILE_TEST("add_file_nosegment", "default nosegment"), + + ADDFILE_TEST_GROUP("add_big_file", "bigfile"), + ADDFILE_TEST("add_big_file_nosegment", "default nosegment bigfile"), + + ADDFILE_TEST_GROUP("add_file_offset", "bigfile map_offset"), + ADDFILE_TEST("add_file_offset_nosegment", + "default nosegment bigfile map_offset"), + + ADDFILE_TEST_GROUP("add_file_offset2", "bigfile offset_in_segment"), + + ADDFILE_TEST_GROUP("add_file_offset3", + "bigfile offset_in_segment map_offset"), END_OF_TESTCASES }; diff --git a/test/regress_main.c b/test/regress_main.c index e8b2af94..849d7e0d 100644 --- a/test/regress_main.c +++ b/test/regress_main.c @@ -106,13 +106,15 @@ static void dnslogcb(int w, const char *m) TT_BLATHER(("%s", m)); } -/* creates a temporary file with the data in it */ +/* creates a temporary file with the data in it. If *filename_out gets set, + * the caller should try to unlink it. */ int -regress_make_tmpfile(const void *data, size_t datalen) +regress_make_tmpfile(const void *data, size_t datalen, char **filename_out) { #ifndef WIN32 char tmpfilename[32]; int fd; + *filename_out = NULL; strcpy(tmpfilename, "/tmp/eventtmp.XXXXXX"); fd = mkstemp(tmpfilename); if (fd == -1) @@ -147,6 +149,7 @@ regress_make_tmpfile(const void *data, size_t datalen) if (tries == 0) return (-1); written = 0; + *filename_out = strdup(tmpfilename); WriteFile(h, data, (DWORD)datalen, &written, NULL); /* Closing the fd returned by this function will indeed close h. */ return _open_osfhandle((intptr_t)h,_O_RDONLY);