001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Collections; 033import java.util.List; 034 035import org.apache.commons.io.Charsets; 036import org.apache.commons.io.IOUtils; 037import org.apache.commons.io.StandardLineSeparator; 038 039/** 040 * Reads lines in a file reversely (similar to a BufferedReader, but starting at 041 * the last line). Useful for e.g. searching in log files. 042 * 043 * @since 2.2 044 */ 045public class ReversedLinesFileReader implements Closeable { 046 047 private class FilePart { 048 private final long no; 049 050 private final byte[] data; 051 052 private byte[] leftOver; 053 054 private int currentLastBytePos; 055 056 /** 057 * ctor 058 * 059 * @param no the part number 060 * @param length its length 061 * @param leftOverOfLastFilePart remainder 062 * @throws IOException if there is a problem reading the file 063 */ 064 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 065 this.no = no; 066 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 067 this.data = new byte[dataLength]; 068 final long off = (no - 1) * blockSize; 069 070 // read data 071 if (no > 0 /* file not empty */) { 072 channel.position(off); 073 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 074 if (countRead != length) { 075 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 076 } 077 } 078 // copy left over part into data arr 079 if (leftOverOfLastFilePart != null) { 080 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 081 } 082 this.currentLastBytePos = data.length - 1; 083 this.leftOver = null; 084 } 085 086 /** 087 * Creates the buffer containing any left over bytes. 088 */ 089 private void createLeftOver() { 090 final int lineLengthBytes = currentLastBytePos + 1; 091 if (lineLengthBytes > 0) { 092 // create left over for next block 093 leftOver = IOUtils.byteArray(lineLengthBytes); 094 System.arraycopy(data, 0, leftOver, 0, lineLengthBytes); 095 } else { 096 leftOver = null; 097 } 098 currentLastBytePos = -1; 099 } 100 101 /** 102 * Finds the new-line sequence and return its length. 103 * 104 * @param data buffer to scan 105 * @param i start offset in buffer 106 * @return length of newline sequence or 0 if none found 107 */ 108 private int getNewLineMatchByteCount(final byte[] data, final int i) { 109 for (final byte[] newLineSequence : newLineSequences) { 110 boolean match = true; 111 for (int j = newLineSequence.length - 1; j >= 0; j--) { 112 final int k = i + j - (newLineSequence.length - 1); 113 match &= k >= 0 && data[k] == newLineSequence[j]; 114 } 115 if (match) { 116 return newLineSequence.length; 117 } 118 } 119 return 0; 120 } 121 122 /** 123 * Reads a line. 124 * 125 * @return the line or null 126 */ 127 private String readLine() { 128 129 String line = null; 130 int newLineMatchByteCount; 131 132 final boolean isLastFilePart = no == 1; 133 134 int i = currentLastBytePos; 135 while (i > -1) { 136 137 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 138 // avoidNewlineSplitBuffer: for all except the last file part we 139 // take a few bytes to the next file part to avoid splitting of newlines 140 createLeftOver(); 141 break; // skip last few bytes and leave it to the next file part 142 } 143 144 // --- check for newline --- 145 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 146 final int lineStart = i + 1; 147 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 148 149 if (lineLengthBytes < 0) { 150 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 151 } 152 final byte[] lineData = IOUtils.byteArray(lineLengthBytes); 153 System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes); 154 155 line = new String(lineData, charset); 156 157 currentLastBytePos = i - newLineMatchByteCount; 158 break; // found line 159 } 160 161 // --- move cursor --- 162 i -= byteDecrement; 163 164 // --- end of file part handling --- 165 if (i < 0) { 166 createLeftOver(); 167 break; // end of file part 168 } 169 } 170 171 // --- last file part handling --- 172 if (isLastFilePart && leftOver != null) { 173 // there will be no line break anymore, this is the first line of the file 174 line = new String(leftOver, charset); 175 leftOver = null; 176 } 177 178 return line; 179 } 180 181 /** 182 * Handles block rollover 183 * 184 * @return the new FilePart or null 185 * @throws IOException if there was a problem reading the file 186 */ 187 private FilePart rollOver() throws IOException { 188 189 if (currentLastBytePos > -1) { 190 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 191 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 192 } 193 194 if (no > 1) { 195 return new FilePart(no - 1, blockSize, leftOver); 196 } 197 // NO 1 was the last FilePart, we're finished 198 if (leftOver != null) { 199 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 200 + new String(leftOver, charset)); 201 } 202 return null; 203 } 204 } 205 206 private static final String EMPTY_STRING = ""; 207 private static final int DEFAULT_BLOCK_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 208 209 private final int blockSize; 210 private final Charset charset; 211 private final SeekableByteChannel channel; 212 private final long totalByteLength; 213 private final long totalBlockCount; 214 private final byte[][] newLineSequences; 215 private final int avoidNewlineSplitBufferSize; 216 private final int byteDecrement; 217 private FilePart currentFilePart; 218 private boolean trailingNewlineOfFileSkipped; 219 220 /** 221 * Creates a ReversedLinesFileReader with default block size of 4KB and the 222 * platform's default encoding. 223 * 224 * @param file the file to be read 225 * @throws IOException if an I/O error occurs. 226 * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead 227 */ 228 @Deprecated 229 public ReversedLinesFileReader(final File file) throws IOException { 230 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 231 } 232 233 /** 234 * Creates a ReversedLinesFileReader with default block size of 4KB and the 235 * specified encoding. 236 * 237 * @param file the file to be read 238 * @param charset the charset to use, null uses the default Charset. 239 * @throws IOException if an I/O error occurs. 240 * @since 2.5 241 */ 242 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 243 this(file.toPath(), charset); 244 } 245 246 /** 247 * Creates a ReversedLinesFileReader with the given block size and encoding. 248 * 249 * @param file the file to be read 250 * @param blockSize size of the internal buffer (for ideal performance this 251 * should match with the block size of the underlying file 252 * system). 253 * @param charset the encoding of the file, null uses the default Charset. 254 * @throws IOException if an I/O error occurs. 255 * @since 2.3 256 */ 257 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 258 this(file.toPath(), blockSize, charset); 259 } 260 261 /** 262 * Creates a ReversedLinesFileReader with the given block size and encoding. 263 * 264 * @param file the file to be read 265 * @param blockSize size of the internal buffer (for ideal performance this 266 * should match with the block size of the underlying file 267 * system). 268 * @param charsetName the encoding of the file, null uses the default Charset. 269 * @throws IOException if an I/O error occurs 270 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 271 * {@link UnsupportedEncodingException} 272 * in version 2.2 if the 273 * encoding is not 274 * supported. 275 */ 276 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 277 this(file.toPath(), blockSize, charsetName); 278 } 279 280 /** 281 * Creates a ReversedLinesFileReader with default block size of 4KB and the 282 * specified encoding. 283 * 284 * @param file the file to be read 285 * @param charset the charset to use, null uses the default Charset. 286 * @throws IOException if an I/O error occurs. 287 * @since 2.7 288 */ 289 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 290 this(file, DEFAULT_BLOCK_SIZE, charset); 291 } 292 293 /** 294 * Creates a ReversedLinesFileReader with the given block size and encoding. 295 * 296 * @param file the file to be read 297 * @param blockSize size of the internal buffer (for ideal performance this 298 * should match with the block size of the underlying file 299 * system). 300 * @param charset the encoding of the file, null uses the default Charset. 301 * @throws IOException if an I/O error occurs. 302 * @since 2.7 303 */ 304 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 305 this.blockSize = blockSize; 306 this.charset = Charsets.toCharset(charset); 307 308 // --- check & prepare encoding --- 309 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 310 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 311 if (maxBytesPerChar == 1f) { 312 // all one byte encodings are no problem 313 byteDecrement = 1; 314 } else if (this.charset == StandardCharsets.UTF_8) { 315 // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte 316 // can never be a newline byte 317 // http://en.wikipedia.org/wiki/UTF-8 318 byteDecrement = 1; 319 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 320 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 321 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 322 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 323 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 324 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 325 byteDecrement = 1; 326 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 327 // UTF-16 new line sequences are not allowed as second tuple of four byte 328 // sequences, 329 // however byte order has to be specified 330 byteDecrement = 2; 331 } else if (this.charset == StandardCharsets.UTF_16) { 332 throw new UnsupportedEncodingException( 333 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 334 } else { 335 throw new UnsupportedEncodingException( 336 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 337 } 338 339 // NOTE: The new line sequences are matched in the order given, so it is 340 // important that \r\n is BEFORE \n 341 this.newLineSequences = new byte[][] { 342 StandardLineSeparator.CRLF.getBytes(this.charset), 343 StandardLineSeparator.LF.getBytes(this.charset), 344 StandardLineSeparator.CR.getBytes(this.charset) 345 }; 346 347 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 348 349 // Open file 350 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 351 this.totalByteLength = channel.size(); 352 int lastBlockLength = (int) (this.totalByteLength % blockSize); 353 if (lastBlockLength > 0) { 354 this.totalBlockCount = this.totalByteLength / blockSize + 1; 355 } else { 356 this.totalBlockCount = this.totalByteLength / blockSize; 357 if (this.totalByteLength > 0) { 358 lastBlockLength = blockSize; 359 } 360 } 361 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 362 363 } 364 365 /** 366 * Creates a ReversedLinesFileReader with the given block size and encoding. 367 * 368 * @param file the file to be read 369 * @param blockSize size of the internal buffer (for ideal performance this 370 * should match with the block size of the underlying file 371 * system). 372 * @param charsetName the encoding of the file, null uses the default Charset. 373 * @throws IOException if an I/O error occurs 374 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 375 * {@link UnsupportedEncodingException} 376 * in version 2.2 if the 377 * encoding is not 378 * supported. 379 * @since 2.7 380 */ 381 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 382 this(file, blockSize, Charsets.toCharset(charsetName)); 383 } 384 385 /** 386 * Closes underlying resources. 387 * 388 * @throws IOException if an I/O error occurs. 389 */ 390 @Override 391 public void close() throws IOException { 392 channel.close(); 393 } 394 395 /** 396 * Returns the lines of the file from bottom to top. 397 * 398 * @return the next line or null if the start of the file is reached 399 * @throws IOException if an I/O error occurs. 400 */ 401 public String readLine() throws IOException { 402 403 String line = currentFilePart.readLine(); 404 while (line == null) { 405 currentFilePart = currentFilePart.rollOver(); 406 if (currentFilePart == null) { 407 // no more fileparts: we're done, leave line set to null 408 break; 409 } 410 line = currentFilePart.readLine(); 411 } 412 413 // aligned behavior with BufferedReader that doesn't return a last, empty line 414 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 415 trailingNewlineOfFileSkipped = true; 416 line = readLine(); 417 } 418 419 return line; 420 } 421 422 /** 423 * Returns {@code lineCount} lines of the file from bottom to top. 424 * <p> 425 * If there are less than {@code lineCount} lines in the file, then that's what 426 * you get. 427 * </p> 428 * <p> 429 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 430 * </p> 431 * 432 * @param lineCount How many lines to read. 433 * @return A new list 434 * @throws IOException if an I/O error occurs. 435 * @since 2.8.0 436 */ 437 public List<String> readLines(final int lineCount) throws IOException { 438 if (lineCount < 0) { 439 throw new IllegalArgumentException("lineCount < 0"); 440 } 441 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 442 for (int i = 0; i < lineCount; i++) { 443 final String line = readLine(); 444 if (line == null) { 445 return arrayList; 446 } 447 arrayList.add(line); 448 } 449 return arrayList; 450 } 451 452 /** 453 * Returns the last {@code lineCount} lines of the file. 454 * <p> 455 * If there are less than {@code lineCount} lines in the file, then that's what 456 * you get. 457 * </p> 458 * 459 * @param lineCount How many lines to read. 460 * @return A String. 461 * @throws IOException if an I/O error occurs. 462 * @since 2.8.0 463 */ 464 public String toString(final int lineCount) throws IOException { 465 final List<String> lines = readLines(lineCount); 466 Collections.reverse(lines); 467 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 468 } 469 470}