PHP: ext/bz2 のマルチチャンク対応

http://www.voltex.jp/patches/php-bz2-multiple-chunks.20071229.patch.diff.bz2

ストリームフィルタと bzdecompress() / bzcompress() は直してません。気が向いたらやります。
(というか誰かやって)

Index: ext/bz2/bz2.c
===================================================================
RCS file: /repository/php-src/ext/bz2/bz2.c,v
retrieving revision 1.14.2.3.2.12
diff -u -r1.14.2.3.2.12 bz2.c
--- ext/bz2/bz2.c	14 Mar 2007 03:50:18 -0000	1.14.2.3.2.12
+++ ext/bz2/bz2.c	28 Dec 2007 22:50:31 -0000
@@ -126,7 +126,22 @@
 #endif
 
 struct php_bz2_stream_data_t {
-	BZFILE *bz_file;
+	struct {
+		char *buf;
+		size_t buf_sz;
+		bz_stream bzs;
+		int eof;
+		int uninitialized;
+	} in;
+	struct {
+		char *buf;
+		size_t buf_sz;
+		bz_stream bzs;
+		int uninitialized;
+		int blk_sz;
+		int work_factor;
+	} out;
+	int err;
 	php_stream *stream;
 };
 
@@ -136,32 +151,169 @@
 {
 	struct php_bz2_stream_data_t *self = (struct php_bz2_stream_data_t *) stream->abstract;
 	size_t ret;
-	
-	ret = BZ2_bzread(self->bz_file, buf, count);
+	size_t nbytes_read = 0;
+
+	self->in.bzs.next_out = buf;
+	self->in.bzs.avail_out = count;
+
+	while (self->in.bzs.avail_out > 0) {
+		int bzerr;
+
+		if (self->in.bzs.avail_in == 0) {
+			if (!self->in.buf) {
+				self->in.buf = emalloc(8192);
+				self->in.buf_sz = 8192;
+			}
+
+			nbytes_read = php_stream_read(self->stream,
+					self->in.buf + self->in.bzs.avail_in,
+					self->in.buf_sz - self->in.bzs.avail_in);
+			if (nbytes_read == 0) {
+				if (php_stream_eof(self->stream)) {
+					stream->eof = 1;
+					if (!self->in.eof) {
+						self->err = BZ_UNEXPECTED_EOF;
+						php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unexpected end of stream", self->in.bzs.avail_in);
+					}
+					break;
+				} else {
+					self->err = BZ_IO_ERROR;
+					return 0;
+				}
+			}
+			self->in.bzs.next_in = self->in.buf;
+			self->in.bzs.avail_in += nbytes_read;
+		}
+
+		if (self->in.eof) {
+			bzerr = BZ2_bzDecompressEnd(&self->in.bzs);
+			if (bzerr != BZ_OK) {
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to reinitialize bzip2 decompressor", self->in.bzs.avail_in);
+				stream->eof = 1;
+				return 0; 
+			}
+			self->in.uninitialized = 1;
+			self->in.eof = 0;
+		}
+
+		if (self->in.uninitialized) {
+			bzerr = BZ2_bzDecompressInit(&self->in.bzs, 0, 0);
+			if (bzerr != BZ_OK) {
+				self->err = bzerr;
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to initialize bzip2 decompressor");
+				return 0; /* error! */
+			}
+			self->in.uninitialized = 0;
+		}
 
-	if (ret == 0) {
-		stream->eof = 1;
+		bzerr = BZ2_bzDecompress(&self->in.bzs);
+		if (bzerr == BZ_STREAM_END) {
+			self->in.eof = 1;
+		} else if (bzerr != BZ_OK) {
+			self->err = bzerr;
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to decompress %u bytes", self->in.bzs.avail_in);
+			return 0; /* error! */
+		}
 	}
 
-	return ret;
+	return self->in.bzs.next_out - buf;
 }
 
 static size_t php_bz2iop_write(php_stream *stream, const char *buf, size_t count TSRMLS_DC)
 {
 	struct php_bz2_stream_data_t *self = (struct php_bz2_stream_data_t *) stream->abstract;
+	size_t ret;
+	size_t nbytes_written;
+	size_t desired_buf_sz;
+	int bzerr;
+
+	desired_buf_sz = count * 2;
+	if (desired_buf_sz == 0) {
+		desired_buf_sz = 8192;
+	}
+
+	if (desired_buf_sz > self->out.buf_sz || !self->out.buf) {
+		self->out.buf = erealloc(self->out.buf, desired_buf_sz);
+		self->out.buf_sz = desired_buf_sz;
+	}
+
+	if (self->out.uninitialized) {
+		bzerr = BZ2_bzCompressInit(&self->out.bzs,
+				self->out.blk_sz, 0, self->out.work_factor);
+		if (bzerr != BZ_OK) {
+			self->err = bzerr;
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to initialize bzip2 compressor");
+			return 0; /* error! */
+		}
+		self->out.uninitialized = 0;
+	}
 
-	return BZ2_bzwrite(self->bz_file, (char*)buf, count); 
+	self->out.bzs.next_in = (char *)buf;
+	self->out.bzs.avail_in = count;
+
+	while (self->out.bzs.avail_in > 0) {
+		int bzerr;
+		size_t out_sz;
+
+		self->out.bzs.next_out = self->out.buf;
+		self->out.bzs.avail_out = self->out.buf_sz;
+		bzerr = BZ2_bzCompress(&self->out.bzs, BZ_RUN);
+		if (bzerr != BZ_RUN_OK) {
+			self->err = bzerr;
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to compress %u bytes", count);
+			return 0; /* error! */
+		}
+
+		out_sz = self->out.bzs.next_out - self->out.buf;
+		if (out_sz > 0) {
+			nbytes_written = php_stream_write(self->stream,
+					self->out.buf, out_sz);
+			if (nbytes_written != out_sz) {
+				self->err = BZ_IO_ERROR;
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %u bytes of compressed data", out_sz);
+				return 0; /* error! */
+			}
+		}
+	}
+
+	return (char *)self->out.bzs.next_in - buf;
 }
 
 static int php_bz2iop_close(php_stream *stream, int close_handle TSRMLS_DC)
 {
 	struct php_bz2_stream_data_t *self = (struct php_bz2_stream_data_t *)stream->abstract;
 	int ret = EOF;
-	
-	if (close_handle) {
-		BZ2_bzclose(self->bz_file);
-	}
 
+	if (!self->in.uninitialized) {
+		BZ2_bzDecompressEnd(&self->in.bzs);
+	}
+	if (!self->out.uninitialized) {
+		int bzerr;
+		do {
+			size_t out_sz;
+			self->out.bzs.next_out = self->out.buf;
+			self->out.bzs.avail_out = self->out.buf_sz;
+			self->out.bzs.next_in = 0;
+			self->out.bzs.avail_in = 0;
+			bzerr = BZ2_bzCompress(&self->out.bzs, BZ_FINISH);
+			if (bzerr != BZ_STREAM_END && bzerr != BZ_FINISH_OK) {
+				self->err = bzerr;
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to finish bzip2 compressor");
+				break;
+			}
+			out_sz = self->out.bzs.next_out - self->out.buf;
+			if (out_sz > 0) {
+				size_t nbytes_written = php_stream_write(self->stream,
+						self->out.buf, out_sz);
+				if (nbytes_written != out_sz) {
+					self->err = BZ_IO_ERROR;
+					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %u bytes of compressed data", out_sz);
+					break;
+				}
+			}
+		} while (bzerr != BZ_STREAM_END);
+		BZ2_bzCompressEnd(&self->out.bzs);
+	}
 	if (self->stream) {
 		php_stream_free(self->stream, PHP_STREAM_FREE_CLOSE | (close_handle == 0 ? PHP_STREAM_FREE_PRESERVE_HANDLE : 0));
 	}
@@ -174,7 +326,35 @@
 static int php_bz2iop_flush(php_stream *stream TSRMLS_DC)
 {
 	struct php_bz2_stream_data_t *self = (struct php_bz2_stream_data_t *)stream->abstract;
-	return BZ2_bzflush(self->bz_file);
+	if (!self->out.uninitialized) {
+		int bzerr;
+		do {
+			size_t out_sz;
+
+			self->out.bzs.next_out = self->out.buf;
+			self->out.bzs.avail_out = self->out.buf_sz;
+			self->out.bzs.next_in = 0;
+			self->out.bzs.avail_in = 0;
+			bzerr = BZ2_bzCompress(&self->out.bzs, BZ_FLUSH);
+			if (bzerr != BZ_RUN_OK && bzerr != BZ_FLUSH_OK) {
+				self->err = bzerr;
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to flush bzip2 compressor");
+				return 1; /* error */
+			}
+
+			out_sz = self->out.bzs.next_out - self->out.buf;
+			if (out_sz > 0) {
+				size_t nbytes_written = php_stream_write(self->stream,
+						self->out.buf, out_sz);
+				if (nbytes_written != out_sz) {
+					self->err = BZ_IO_ERROR;
+					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %u bytes of compressed data", out_sz);
+					return 0; /* error! */
+				}
+			}
+		} while (bzerr != BZ_RUN_OK);
+	}
+	return 0;
 }
 /* }}} */
 
@@ -189,17 +369,36 @@
 };
 
 /* {{{ Bzip2 stream openers */
-PHP_BZ2_API php_stream *_php_stream_bz2open_from_BZFILE(BZFILE *bz, 
-														char *mode, php_stream *innerstream STREAMS_DC TSRMLS_DC)
+PHP_BZ2_API php_stream *_php_stream_bz2open_alloc(php_stream *stream, char *mode)
 {
+	php_stream *retval;
 	struct php_bz2_stream_data_t *self;
 	
 	self = emalloc(sizeof(*self));
 
-	self->stream = innerstream;
-	self->bz_file = bz;
+	self->in.buf = NULL;
+	self->in.buf_sz = 0;
+	self->in.eof = 0;
+	self->in.uninitialized = 1;
+	self->in.bzs.avail_in = 0;
+	self->in.bzs.bzalloc = NULL;
+	self->in.bzs.bzfree = NULL;
+	self->in.bzs.opaque = NULL;
+	self->out.buf = NULL;
+	self->out.buf_sz = 0;
+	self->out.blk_sz = 4;
+	self->out.work_factor = 0;
+	self->out.uninitialized = 1;
+	self->out.bzs.bzalloc = NULL;
+	self->out.bzs.bzfree = NULL;
+	self->out.bzs.opaque = NULL;
+	self->stream = stream;
+	self->err = BZ_OK;
 
-	return php_stream_alloc_rel(&php_stream_bz2io_ops, self, 0, mode);
+	retval = php_stream_alloc_rel(&php_stream_bz2io_ops, self, 0, mode);
+	if (!retval)
+		return NULL;
+	retval->flags |= PHP_STREAM_FLAG_NO_BUFFER;
 }
 
 PHP_BZ2_API php_stream *_php_stream_bz2open(php_stream_wrapper *wrapper,
@@ -211,7 +410,6 @@
 {
 	php_stream *retstream = NULL, *stream = NULL;
 	char *path_copy = NULL;
-	BZFILE *bz_file = NULL;
 
 	if (strncasecmp("compress.bzip2://", path, 17) == 0) {
 		path += 17;
@@ -219,57 +417,16 @@
 	if (mode[0] == '\0' || (mode[0] != 'w' && mode[0] != 'r' && mode[1] != '\0')) {
 		return NULL;
 	}
-
-#ifdef VIRTUAL_DIR
-	virtual_filepath_ex(path, &path_copy, NULL TSRMLS_CC);
-#else
-	path_copy = path;
-#endif  
-
-	if ((PG(safe_mode) && (!php_checkuid(path_copy, NULL, CHECKUID_CHECK_FILE_AND_DIR))) || php_check_open_basedir(path_copy TSRMLS_CC)) {
+	stream = php_stream_open_wrapper(path, mode, options | ENFORCE_SAFE_MODE, opened_path);
+	if (!stream) {
 		return NULL;
 	}
-	
-	/* try and open it directly first */
-	bz_file = BZ2_bzopen(path_copy, mode);
-
-	if (opened_path && bz_file) {
-		*opened_path = estrdup(path_copy);
-	}
-	path_copy = NULL;
-	
-	if (bz_file == NULL) {
-		/* that didn't work, so try and get something from the network/wrapper */
-		stream = php_stream_open_wrapper(path, mode, options | STREAM_WILL_CAST | ENFORCE_SAFE_MODE, opened_path);
-	
-		if (stream) {
-			int fd;
-			if (SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD, (void **) &fd, REPORT_ERRORS)) {
-				bz_file = BZ2_bzdopen(fd, mode);
-			}
-		}
-		/* remove the file created by php_stream_open_wrapper(), it is not needed since BZ2 functions
-		 * failed.
-		 */
-		if (opened_path && !bz_file && mode[0] == 'w') {
-			VCWD_UNLINK(*opened_path);
-		}
-	}
-	
-	if (bz_file) {
-		retstream = _php_stream_bz2open_from_BZFILE(bz_file, mode, stream STREAMS_REL_CC TSRMLS_CC);
-		if (retstream) {
-			return retstream;
-		}
-
-		BZ2_bzclose(bz_file);
-	}
-
-	if (stream) {
+	retstream = _php_stream_bz2open_alloc(stream, mode);
+	if (!retstream) {
 		php_stream_close(stream);
+		return NULL;
 	}
-
-	return NULL;
+	return retstream;
 }
 
 /* }}} */
@@ -366,7 +523,6 @@
 {
 	zval    **file,   /* The file to open */
 	        **mode;   /* The mode to open the stream with */
-	BZFILE   *bz;     /* The compressed file stream */
 	php_stream *stream = NULL;
 	
 	if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &file, &mode) == FAILURE) {
@@ -431,13 +587,7 @@
 				break;
 		}
 
-		if (FAILURE == php_stream_cast(stream, PHP_STREAM_AS_FD, (void *) &fd, REPORT_ERRORS)) {
-			RETURN_FALSE;
-		}
-		
-		bz = BZ2_bzdopen(fd, Z_STRVAL_PP(mode));
-
-		stream = php_stream_bz2open_from_BZFILE(bz, Z_STRVAL_PP(mode), stream);
+		stream = _php_stream_bz2open_alloc(stream, Z_STRVAL_PP(mode));
 	}
 
 	if (stream) {
@@ -591,7 +741,6 @@
 	zval        **bzp;     /* BZip2 Resource Pointer */
 	php_stream   *stream;
 	const char   *errstr;  /* Error string */
-	int           errnum;  /* Error number */
 	struct php_bz2_stream_data_t *self;
 	
 	if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &bzp) == FAILURE) {
@@ -605,14 +754,46 @@
 	}
 
 	self = (struct php_bz2_stream_data_t *) stream->abstract;
-	
-	/* Fetch the error information */
-	errstr = BZ2_bzerror(self->bz_file, &errnum);
+
+	switch (self->err) {
+		case BZ_OK:
+		case BZ_FLUSH_OK:
+		case BZ_FINISH_OK:
+			errstr = "No error";
+			break;
+		case BZ_STREAM_END:
+			errstr = "End of stream";
+			break;
+		case BZ_SEQUENCE_ERROR:
+			errstr = "Illegal calling sequence";
+			break;
+		case BZ_PARAM_ERROR:
+			errstr = "Invalid parameters";
+			break;
+		case BZ_MEM_ERROR:
+			errstr = "Insufficient memory";
+			break;
+		case BZ_DATA_ERROR:
+			errstr = "Invalid bz2 stream";
+			break;
+		case BZ_DATA_ERROR_MAGIC:
+			errstr = "No magic bytes ahead of the stream";
+			break;
+		case BZ_IO_ERROR:
+			errstr = "I/O error";
+			break;
+		case BZ_UNEXPECTED_EOF:
+			errstr = "Unexpected end of file";
+			break;
+		case BZ_CONFIG_ERROR:
+			errstr = "Configuration error";
+			break;
+	}
 	
 	/* Determine what to return */
 	switch (opt) {
 		case PHP_BZ_ERRNO:
-			RETURN_LONG(errnum);
+			RETURN_LONG(self->err);
 			break;
 		case PHP_BZ_ERRSTR:
 			RETURN_STRING((char*)errstr, 1);
@@ -620,7 +801,7 @@
 		case PHP_BZ_ERRBOTH:
 			array_init(return_value);
 		
-			add_assoc_long  (return_value, "errno",  errnum);
+			add_assoc_long  (return_value, "errno",  self->err);
 			add_assoc_string(return_value, "errstr", (char*)errstr, 1);
 			break;
 	}

追記: _php_stream_bz2open_alloc() が return してない orz 上の URL の方では直してあります。