001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Collections;
033import java.util.List;
034
035import org.apache.commons.io.Charsets;
036import org.apache.commons.io.IOUtils;
037import org.apache.commons.io.StandardLineSeparator;
038
039/**
040 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
041 * the last line). Useful for e.g. searching in log files.
042 *
043 * @since 2.2
044 */
045public class ReversedLinesFileReader implements Closeable {
046
047    private class FilePart {
048        private final long no;
049
050        private final byte[] data;
051
052        private byte[] leftOver;
053
054        private int currentLastBytePos;
055
056        /**
057         * ctor
058         *
059         * @param no                     the part number
060         * @param length                 its length
061         * @param leftOverOfLastFilePart remainder
062         * @throws IOException if there is a problem reading the file
063         */
064        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
065            this.no = no;
066            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
067            this.data = new byte[dataLength];
068            final long off = (no - 1) * blockSize;
069
070            // read data
071            if (no > 0 /* file not empty */) {
072                channel.position(off);
073                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
074                if (countRead != length) {
075                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
076                }
077            }
078            // copy left over part into data arr
079            if (leftOverOfLastFilePart != null) {
080                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
081            }
082            this.currentLastBytePos = data.length - 1;
083            this.leftOver = null;
084        }
085
086        /**
087         * Creates the buffer containing any left over bytes.
088         */
089        private void createLeftOver() {
090            final int lineLengthBytes = currentLastBytePos + 1;
091            if (lineLengthBytes > 0) {
092                // create left over for next block
093                leftOver = IOUtils.byteArray(lineLengthBytes);
094                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
095            } else {
096                leftOver = null;
097            }
098            currentLastBytePos = -1;
099        }
100
101        /**
102         * Finds the new-line sequence and return its length.
103         *
104         * @param data buffer to scan
105         * @param i    start offset in buffer
106         * @return length of newline sequence or 0 if none found
107         */
108        private int getNewLineMatchByteCount(final byte[] data, final int i) {
109            for (final byte[] newLineSequence : newLineSequences) {
110                boolean match = true;
111                for (int j = newLineSequence.length - 1; j >= 0; j--) {
112                    final int k = i + j - (newLineSequence.length - 1);
113                    match &= k >= 0 && data[k] == newLineSequence[j];
114                }
115                if (match) {
116                    return newLineSequence.length;
117                }
118            }
119            return 0;
120        }
121
122        /**
123         * Reads a line.
124         *
125         * @return the line or null
126         */
127        private String readLine() {
128
129            String line = null;
130            int newLineMatchByteCount;
131
132            final boolean isLastFilePart = no == 1;
133
134            int i = currentLastBytePos;
135            while (i > -1) {
136
137                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
138                    // avoidNewlineSplitBuffer: for all except the last file part we
139                    // take a few bytes to the next file part to avoid splitting of newlines
140                    createLeftOver();
141                    break; // skip last few bytes and leave it to the next file part
142                }
143
144                // --- check for newline ---
145                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
146                    final int lineStart = i + 1;
147                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
148
149                    if (lineLengthBytes < 0) {
150                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
151                    }
152                    final byte[] lineData = IOUtils.byteArray(lineLengthBytes);
153                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);
154
155                    line = new String(lineData, charset);
156
157                    currentLastBytePos = i - newLineMatchByteCount;
158                    break; // found line
159                }
160
161                // --- move cursor ---
162                i -= byteDecrement;
163
164                // --- end of file part handling ---
165                if (i < 0) {
166                    createLeftOver();
167                    break; // end of file part
168                }
169            }
170
171            // --- last file part handling ---
172            if (isLastFilePart && leftOver != null) {
173                // there will be no line break anymore, this is the first line of the file
174                line = new String(leftOver, charset);
175                leftOver = null;
176            }
177
178            return line;
179        }
180
181        /**
182         * Handles block rollover
183         *
184         * @return the new FilePart or null
185         * @throws IOException if there was a problem reading the file
186         */
187        private FilePart rollOver() throws IOException {
188
189            if (currentLastBytePos > -1) {
190                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
191                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
192            }
193
194            if (no > 1) {
195                return new FilePart(no - 1, blockSize, leftOver);
196            }
197            // NO 1 was the last FilePart, we're finished
198            if (leftOver != null) {
199                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
200                        + new String(leftOver, charset));
201            }
202            return null;
203        }
204    }
205
206    private static final String EMPTY_STRING = "";
207    private static final int DEFAULT_BLOCK_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
208
209    private final int blockSize;
210    private final Charset charset;
211    private final SeekableByteChannel channel;
212    private final long totalByteLength;
213    private final long totalBlockCount;
214    private final byte[][] newLineSequences;
215    private final int avoidNewlineSplitBufferSize;
216    private final int byteDecrement;
217    private FilePart currentFilePart;
218    private boolean trailingNewlineOfFileSkipped;
219
220    /**
221     * Creates a ReversedLinesFileReader with default block size of 4KB and the
222     * platform's default encoding.
223     *
224     * @param file the file to be read
225     * @throws IOException if an I/O error occurs.
226     * @deprecated 2.5 use {@link #ReversedLinesFileReader(File, Charset)} instead
227     */
228    @Deprecated
229    public ReversedLinesFileReader(final File file) throws IOException {
230        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
231    }
232
233    /**
234     * Creates a ReversedLinesFileReader with default block size of 4KB and the
235     * specified encoding.
236     *
237     * @param file    the file to be read
238     * @param charset the charset to use, null uses the default Charset.
239     * @throws IOException if an I/O error occurs.
240     * @since 2.5
241     */
242    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
243        this(file.toPath(), charset);
244    }
245
246    /**
247     * Creates a ReversedLinesFileReader with the given block size and encoding.
248     *
249     * @param file      the file to be read
250     * @param blockSize size of the internal buffer (for ideal performance this
251     *                  should match with the block size of the underlying file
252     *                  system).
253     * @param charset  the encoding of the file, null uses the default Charset.
254     * @throws IOException if an I/O error occurs.
255     * @since 2.3
256     */
257    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
258        this(file.toPath(), blockSize, charset);
259    }
260
261    /**
262     * Creates a ReversedLinesFileReader with the given block size and encoding.
263     *
264     * @param file      the file to be read
265     * @param blockSize size of the internal buffer (for ideal performance this
266     *                  should match with the block size of the underlying file
267     *                  system).
268     * @param charsetName  the encoding of the file, null uses the default Charset.
269     * @throws IOException                                  if an I/O error occurs
270     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
271     *                                                      {@link UnsupportedEncodingException}
272     *                                                      in version 2.2 if the
273     *                                                      encoding is not
274     *                                                      supported.
275     */
276    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
277        this(file.toPath(), blockSize, charsetName);
278    }
279
280    /**
281     * Creates a ReversedLinesFileReader with default block size of 4KB and the
282     * specified encoding.
283     *
284     * @param file    the file to be read
285     * @param charset the charset to use, null uses the default Charset.
286     * @throws IOException if an I/O error occurs.
287     * @since 2.7
288     */
289    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
290        this(file, DEFAULT_BLOCK_SIZE, charset);
291    }
292
293    /**
294     * Creates a ReversedLinesFileReader with the given block size and encoding.
295     *
296     * @param file      the file to be read
297     * @param blockSize size of the internal buffer (for ideal performance this
298     *                  should match with the block size of the underlying file
299     *                  system).
300     * @param charset  the encoding of the file, null uses the default Charset.
301     * @throws IOException if an I/O error occurs.
302     * @since 2.7
303     */
304    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
305        this.blockSize = blockSize;
306        this.charset = Charsets.toCharset(charset);
307
308        // --- check & prepare encoding ---
309        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
310        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
311        if (maxBytesPerChar == 1f) {
312            // all one byte encodings are no problem
313            byteDecrement = 1;
314        } else if (this.charset == StandardCharsets.UTF_8) {
315            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte
316            // can never be a newline byte
317            // http://en.wikipedia.org/wiki/UTF-8
318            byteDecrement = 1;
319        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
320        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
321                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
322                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
323                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
324                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
325            byteDecrement = 1;
326        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
327            // UTF-16 new line sequences are not allowed as second tuple of four byte
328            // sequences,
329            // however byte order has to be specified
330            byteDecrement = 2;
331        } else if (this.charset == StandardCharsets.UTF_16) {
332            throw new UnsupportedEncodingException(
333                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
334        } else {
335            throw new UnsupportedEncodingException(
336                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
337        }
338
339        // NOTE: The new line sequences are matched in the order given, so it is
340        // important that \r\n is BEFORE \n
341        this.newLineSequences = new byte[][] {
342            StandardLineSeparator.CRLF.getBytes(this.charset),
343            StandardLineSeparator.LF.getBytes(this.charset),
344            StandardLineSeparator.CR.getBytes(this.charset)
345        };
346
347        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
348
349        // Open file
350        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
351        this.totalByteLength = channel.size();
352        int lastBlockLength = (int) (this.totalByteLength % blockSize);
353        if (lastBlockLength > 0) {
354            this.totalBlockCount = this.totalByteLength / blockSize + 1;
355        } else {
356            this.totalBlockCount = this.totalByteLength / blockSize;
357            if (this.totalByteLength > 0) {
358                lastBlockLength = blockSize;
359            }
360        }
361        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
362
363    }
364
365    /**
366     * Creates a ReversedLinesFileReader with the given block size and encoding.
367     *
368     * @param file        the file to be read
369     * @param blockSize   size of the internal buffer (for ideal performance this
370     *                    should match with the block size of the underlying file
371     *                    system).
372     * @param charsetName the encoding of the file, null uses the default Charset.
373     * @throws IOException                                  if an I/O error occurs
374     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
375     *                                                      {@link UnsupportedEncodingException}
376     *                                                      in version 2.2 if the
377     *                                                      encoding is not
378     *                                                      supported.
379     * @since 2.7
380     */
381    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
382        this(file, blockSize, Charsets.toCharset(charsetName));
383    }
384
385    /**
386     * Closes underlying resources.
387     *
388     * @throws IOException if an I/O error occurs.
389     */
390    @Override
391    public void close() throws IOException {
392        channel.close();
393    }
394
395    /**
396     * Returns the lines of the file from bottom to top.
397     *
398     * @return the next line or null if the start of the file is reached
399     * @throws IOException if an I/O error occurs.
400     */
401    public String readLine() throws IOException {
402
403        String line = currentFilePart.readLine();
404        while (line == null) {
405            currentFilePart = currentFilePart.rollOver();
406            if (currentFilePart == null) {
407                // no more fileparts: we're done, leave line set to null
408                break;
409            }
410            line = currentFilePart.readLine();
411        }
412
413        // aligned behavior with BufferedReader that doesn't return a last, empty line
414        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
415            trailingNewlineOfFileSkipped = true;
416            line = readLine();
417        }
418
419        return line;
420    }
421
422    /**
423     * Returns {@code lineCount} lines of the file from bottom to top.
424     * <p>
425     * If there are less than {@code lineCount} lines in the file, then that's what
426     * you get.
427     * </p>
428     * <p>
429     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
430     * </p>
431     *
432     * @param lineCount How many lines to read.
433     * @return A new list
434     * @throws IOException if an I/O error occurs.
435     * @since 2.8.0
436     */
437    public List<String> readLines(final int lineCount) throws IOException {
438        if (lineCount < 0) {
439            throw new IllegalArgumentException("lineCount < 0");
440        }
441        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
442        for (int i = 0; i < lineCount; i++) {
443            final String line = readLine();
444            if (line == null) {
445                return arrayList;
446            }
447            arrayList.add(line);
448        }
449        return arrayList;
450    }
451
452    /**
453     * Returns the last {@code lineCount} lines of the file.
454     * <p>
455     * If there are less than {@code lineCount} lines in the file, then that's what
456     * you get.
457     * </p>
458     *
459     * @param lineCount How many lines to read.
460     * @return A String.
461     * @throws IOException if an I/O error occurs.
462     * @since 2.8.0
463     */
464    public String toString(final int lineCount) throws IOException {
465        final List<String> lines = readLines(lineCount);
466        Collections.reverse(lines);
467        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
468    }
469
470}