001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.Reader; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.Charset; 027import java.nio.charset.CharsetEncoder; 028import java.nio.charset.CoderResult; 029import java.nio.charset.CodingErrorAction; 030import java.util.Objects; 031 032/** 033 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 034 * and transforms it to a byte stream using a specified charset encoding. The stream 035 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 036 * encodings supported by the JRE are handled correctly. In particular for charsets such as 037 * UTF-16, the implementation ensures that one and only one byte order marker 038 * is produced. 039 * <p> 040 * Since in general it is not possible to predict the number of characters to be read from the 041 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 042 * the {@link Reader} are buffered. There is therefore no well defined correlation 043 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 044 * This also implies that in general there is no need to wrap the underlying {@link Reader} 045 * in a {@link java.io.BufferedReader}. 046 * <p> 047 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 048 * in the following example, reading from {@code in2} would return the same byte 049 * sequence as reading from {@code in} (provided that the initial byte sequence is legal 050 * with respect to the charset encoding): 051 * <pre> 052 * InputStream inputStream = ... 053 * Charset cs = ... 054 * InputStreamReader reader = new InputStreamReader(inputStream, cs); 055 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 056 * 057 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 058 * except that the control flow is reversed: both classes transform a character stream 059 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 060 * while {@link ReaderInputStream} pulls it from the underlying stream. 061 * <p> 062 * Note that while there are use cases where there is no alternative to using 063 * this class, very often the need to use this class is an indication of a flaw 064 * in the design of the code. This class is typically used in situations where an existing 065 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 066 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 067 * where this problem may appear is when implementing the {@code javax.activation.DataSource} 068 * interface from the Java Activation Framework. 069 * <p> 070 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 071 * read operation will block or not, it is not possible to provide a meaningful 072 * implementation of the {@link InputStream#available()} method. A call to this method 073 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 074 * </p> 075 * <p> 076 * Instances of {@link ReaderInputStream} are not thread safe. 077 * </p> 078 * 079 * @see org.apache.commons.io.output.WriterOutputStream 080 * 081 * @since 2.0 082 */ 083public class ReaderInputStream extends InputStream { 084 private static final int DEFAULT_BUFFER_SIZE = 1024; 085 086 private final Reader reader; 087 private final CharsetEncoder encoder; 088 089 /** 090 * CharBuffer used as input for the decoder. It should be reasonably 091 * large as we read data from the underlying Reader into this buffer. 092 */ 093 private final CharBuffer encoderIn; 094 095 /** 096 * ByteBuffer used as output for the decoder. This buffer can be small 097 * as it is only used to transfer data from the decoder to the 098 * buffer provided by the caller. 099 */ 100 private final ByteBuffer encoderOut; 101 102 private CoderResult lastCoderResult; 103 private boolean endOfInput; 104 105 /** 106 * Construct a new {@link ReaderInputStream}. 107 * 108 * @param reader the target {@link Reader} 109 * @param encoder the charset encoder 110 * @since 2.1 111 */ 112 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) { 113 this(reader, encoder, DEFAULT_BUFFER_SIZE); 114 } 115 116 /** 117 * Construct a new {@link ReaderInputStream}. 118 * 119 * @param reader the target {@link Reader} 120 * @param encoder the charset encoder 121 * @param bufferSize the size of the input buffer in number of characters 122 * @since 2.1 123 */ 124 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) { 125 this.reader = reader; 126 this.encoder = encoder; 127 this.encoderIn = CharBuffer.allocate(bufferSize); 128 this.encoderIn.flip(); 129 this.encoderOut = ByteBuffer.allocate(128); 130 this.encoderOut.flip(); 131 } 132 133 /** 134 * Construct a new {@link ReaderInputStream}. 135 * 136 * @param reader the target {@link Reader} 137 * @param charset the charset encoding 138 * @param bufferSize the size of the input buffer in number of characters 139 */ 140 public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) { 141 this(reader, 142 charset.newEncoder() 143 .onMalformedInput(CodingErrorAction.REPLACE) 144 .onUnmappableCharacter(CodingErrorAction.REPLACE), 145 bufferSize); 146 } 147 148 /** 149 * Construct a new {@link ReaderInputStream} with a default input buffer size of 150 * {@value #DEFAULT_BUFFER_SIZE} characters. 151 * 152 * @param reader the target {@link Reader} 153 * @param charset the charset encoding 154 */ 155 public ReaderInputStream(final Reader reader, final Charset charset) { 156 this(reader, charset, DEFAULT_BUFFER_SIZE); 157 } 158 159 /** 160 * Construct a new {@link ReaderInputStream}. 161 * 162 * @param reader the target {@link Reader} 163 * @param charsetName the name of the charset encoding 164 * @param bufferSize the size of the input buffer in number of characters 165 */ 166 public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) { 167 this(reader, Charset.forName(charsetName), bufferSize); 168 } 169 170 /** 171 * Construct a new {@link ReaderInputStream} with a default input buffer size of 172 * {@value #DEFAULT_BUFFER_SIZE} characters. 173 * 174 * @param reader the target {@link Reader} 175 * @param charsetName the name of the charset encoding 176 */ 177 public ReaderInputStream(final Reader reader, final String charsetName) { 178 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 179 } 180 181 /** 182 * Construct a new {@link ReaderInputStream} that uses the default character encoding 183 * with a default input buffer size of {@value #DEFAULT_BUFFER_SIZE} characters. 184 * 185 * @param reader the target {@link Reader} 186 * @deprecated 2.5 use {@link #ReaderInputStream(Reader, Charset)} instead 187 */ 188 @Deprecated 189 public ReaderInputStream(final Reader reader) { 190 this(reader, Charset.defaultCharset()); 191 } 192 193 /** 194 * Fills the internal char buffer from the reader. 195 * 196 * @throws IOException 197 * If an I/O error occurs 198 */ 199 private void fillBuffer() throws IOException { 200 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 201 encoderIn.compact(); 202 final int position = encoderIn.position(); 203 // We don't use Reader#read(CharBuffer) here because it is more efficient 204 // to write directly to the underlying char array (the default implementation 205 // copies data to a temporary char array). 206 final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 207 if (c == EOF) { 208 endOfInput = true; 209 } else { 210 encoderIn.position(position+c); 211 } 212 encoderIn.flip(); 213 } 214 encoderOut.compact(); 215 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 216 encoderOut.flip(); 217 } 218 219 /** 220 * Read the specified number of bytes into an array. 221 * 222 * @param array the byte array to read into 223 * @param off the offset to start reading bytes into 224 * @param len the number of bytes to read 225 * @return the number of bytes read or {@code -1} 226 * if the end of the stream has been reached 227 * @throws IOException if an I/O error occurs. 228 */ 229 @Override 230 public int read(final byte[] array, int off, int len) throws IOException { 231 Objects.requireNonNull(array, "array"); 232 if (len < 0 || off < 0 || (off + len) > array.length) { 233 throw new IndexOutOfBoundsException("Array Size=" + array.length + 234 ", offset=" + off + ", length=" + len); 235 } 236 int read = 0; 237 if (len == 0) { 238 return 0; // Always return 0 if len == 0 239 } 240 while (len > 0) { 241 if (encoderOut.hasRemaining()) { 242 final int c = Math.min(encoderOut.remaining(), len); 243 encoderOut.get(array, off, c); 244 off += c; 245 len -= c; 246 read += c; 247 } else { 248 fillBuffer(); 249 if (endOfInput && !encoderOut.hasRemaining()) { 250 break; 251 } 252 } 253 } 254 return read == 0 && endOfInput ? EOF : read; 255 } 256 257 /** 258 * Read the specified number of bytes into an array. 259 * 260 * @param b the byte array to read into 261 * @return the number of bytes read or {@code -1} 262 * if the end of the stream has been reached 263 * @throws IOException if an I/O error occurs. 264 */ 265 @Override 266 public int read(final byte[] b) throws IOException { 267 return read(b, 0, b.length); 268 } 269 270 /** 271 * Read a single byte. 272 * 273 * @return either the byte read or {@code -1} if the end of the stream 274 * has been reached 275 * @throws IOException if an I/O error occurs. 276 */ 277 @Override 278 public int read() throws IOException { 279 for (;;) { 280 if (encoderOut.hasRemaining()) { 281 return encoderOut.get() & 0xFF; 282 } 283 fillBuffer(); 284 if (endOfInput && !encoderOut.hasRemaining()) { 285 return EOF; 286 } 287 } 288 } 289 290 /** 291 * Close the stream. This method will cause the underlying {@link Reader} 292 * to be closed. 293 * @throws IOException if an I/O error occurs. 294 */ 295 @Override 296 public void close() throws IOException { 297 reader.close(); 298 } 299}