001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.filefilter;
018
019import java.io.File;
020import java.io.IOException;
021import java.io.RandomAccessFile;
022import java.io.Serializable;
023import java.nio.ByteBuffer;
024import java.nio.channels.FileChannel;
025import java.nio.charset.Charset;
026import java.nio.file.FileVisitResult;
027import java.nio.file.Files;
028import java.nio.file.Path;
029import java.nio.file.attribute.BasicFileAttributes;
030import java.util.Arrays;
031
032import org.apache.commons.io.IOUtils;
033
034/**
035 * <p>
036 * File filter for matching files containing a "magic number". A magic number
037 * is a unique series of bytes common to all files of a specific file format.
038 * For instance, all Java class files begin with the bytes
039 * {@code 0xCAFEBABE}.
040 * </p>
041 * <h2>Using Classic IO</h2>
042 * <pre>
043 * File dir = new File(".");
044 * MagicNumberFileFilter javaClassFileFilter =
045 *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
046 *       (byte) 0xBA, (byte) 0xBE});
047 * String[] javaClassFiles = dir.list(javaClassFileFilter);
048 * for (String javaClassFile : javaClassFiles) {
049 *     System.out.println(javaClassFile);
050 * }
051 * </pre>
052 *
053 * <p>
054 * Sometimes, such as in the case of TAR files, the
055 * magic number will be offset by a certain number of bytes in the file. In the
056 * case of TAR archive files, this offset is 257 bytes.
057 * </p>
058 *
059 * <pre>
060 * File dir = new File(".");
061 * MagicNumberFileFilter tarFileFilter =
062 *     MagicNumberFileFilter("ustar", 257);
063 * String[] tarFiles = dir.list(tarFileFilter);
064 * for (String tarFile : tarFiles) {
065 *     System.out.println(tarFile);
066 * }
067 * </pre>
068 * <h2>Using NIO</h2>
069 * <pre>
070 * final Path dir = Paths.get("");
071 * final AccumulatorPathVisitor visitor = AccumulatorPathVisitor.withLongCounters(MagicNumberFileFilter("ustar", 257));
072 * //
073 * // Walk one dir
074 * Files.<b>walkFileTree</b>(dir, Collections.emptySet(), 1, visitor);
075 * System.out.println(visitor.getPathCounters());
076 * System.out.println(visitor.getFileList());
077 * //
078 * visitor.getPathCounters().reset();
079 * //
080 * // Walk dir tree
081 * Files.<b>walkFileTree</b>(dir, visitor);
082 * System.out.println(visitor.getPathCounters());
083 * System.out.println(visitor.getDirList());
084 * System.out.println(visitor.getFileList());
085 * </pre>
086 *
087 * @since 2.0
088 * @see FileFilterUtils#magicNumberFileFilter(byte[])
089 * @see FileFilterUtils#magicNumberFileFilter(String)
090 * @see FileFilterUtils#magicNumberFileFilter(byte[], long)
091 * @see FileFilterUtils#magicNumberFileFilter(String, long)
092 */
093public class MagicNumberFileFilter extends AbstractFileFilter implements
094        Serializable {
095
096    /**
097     * The serialization version unique identifier.
098     */
099    private static final long serialVersionUID = -547733176983104172L;
100
101    /**
102     * The magic number to compare against the file's bytes at the provided
103     * offset.
104     */
105    private final byte[] magicNumbers;
106
107    /**
108     * The offset (in bytes) within the files that the magic number's bytes
109     * should appear.
110     */
111    private final long byteOffset;
112
113    /**
114     * <p>
115     * Constructs a new MagicNumberFileFilter and associates it with the magic
116     * number to test for in files. This constructor assumes a starting offset
117     * of {@code 0}.
118     * </p>
119     *
120     * <p>
121     * It is important to note that <em>the array is not cloned</em> and that
122     * any changes to the magic number array after construction will affect the
123     * behavior of this file filter.
124     * </p>
125     *
126     * <pre>
127     * MagicNumberFileFilter javaClassFileFilter =
128     *     MagicNumberFileFilter(new byte[] {(byte) 0xCA, (byte) 0xFE,
129     *       (byte) 0xBA, (byte) 0xBE});
130     * </pre>
131     *
132     * @param magicNumber the magic number to look for in the file.
133     *
134     * @throws IllegalArgumentException if {@code magicNumber} is
135     *         {@code null}, or contains no bytes.
136     */
137    public MagicNumberFileFilter(final byte[] magicNumber) {
138        this(magicNumber, 0);
139    }
140
141    /**
142     * <p>
143     * Constructs a new MagicNumberFileFilter and associates it with the magic
144     * number to test for in files and the byte offset location in the file to
145     * to look for that magic number.
146     * </p>
147     *
148     * <pre>
149     * MagicNumberFileFilter tarFileFilter =
150     *     MagicNumberFileFilter(new byte[] {0x75, 0x73, 0x74, 0x61, 0x72}, 257);
151     * </pre>
152     *
153     * <pre>
154     * MagicNumberFileFilter javaClassFileFilter =
155     *     MagicNumberFileFilter(new byte[] {0xCA, 0xFE, 0xBA, 0xBE}, 0);
156     * </pre>
157     *
158     * @param magicNumber the magic number to look for in the file.
159     * @param offset the byte offset in the file to start comparing bytes.
160     *
161     * @throws IllegalArgumentException if {@code magicNumber} is
162     *         {@code null}, or contains no bytes, or {@code offset}
163     *         is a negative number.
164     */
165    public MagicNumberFileFilter(final byte[] magicNumber, final long offset) {
166        if (magicNumber == null) {
167            throw new IllegalArgumentException("The magic number cannot be null");
168        }
169        if (magicNumber.length == 0) {
170            throw new IllegalArgumentException("The magic number must contain at least one byte");
171        }
172        if (offset < 0) {
173            throw new IllegalArgumentException("The offset cannot be negative");
174        }
175
176        this.magicNumbers = IOUtils.byteArray(magicNumber.length);
177        System.arraycopy(magicNumber, 0, this.magicNumbers, 0, magicNumber.length);
178        this.byteOffset = offset;
179    }
180
181    /**
182     * <p>
183     * Constructs a new MagicNumberFileFilter and associates it with the magic
184     * number to test for in files. This constructor assumes a starting offset
185     * of {@code 0}.
186     * </p>
187     *
188     * Example usage:
189     * <pre>
190     * {@code
191     * MagicNumberFileFilter xmlFileFilter =
192     *     MagicNumberFileFilter("<?xml");
193     * }
194     * </pre>
195     *
196     * @param magicNumber the magic number to look for in the file.
197     *        The string is converted to bytes using the platform default charset.
198     *
199     * @throws IllegalArgumentException if {@code magicNumber} is
200     *         {@code null} or the empty String.
201     */
202    public MagicNumberFileFilter(final String magicNumber) {
203        this(magicNumber, 0);
204    }
205
206    /**
207     * <p>
208     * Constructs a new MagicNumberFileFilter and associates it with the magic
209     * number to test for in files and the byte offset location in the file to
210     * to look for that magic number.
211     * </p>
212     *
213     * <pre>
214     * MagicNumberFileFilter tarFileFilter =
215     *     MagicNumberFileFilter("ustar", 257);
216     * </pre>
217     *
218     * @param magicNumber the magic number to look for in the file.
219     *        The string is converted to bytes using the platform default charset.
220     * @param offset the byte offset in the file to start comparing bytes.
221     *
222     * @throws IllegalArgumentException if {@code magicNumber} is
223     *         {@code null} or the empty String, or {@code offset} is
224     *         a negative number.
225     */
226    public MagicNumberFileFilter(final String magicNumber, final long offset) {
227        if (magicNumber == null) {
228            throw new IllegalArgumentException("The magic number cannot be null");
229        }
230        if (magicNumber.isEmpty()) {
231            throw new IllegalArgumentException("The magic number must contain at least one byte");
232        }
233        if (offset < 0) {
234            throw new IllegalArgumentException("The offset cannot be negative");
235        }
236
237        this.magicNumbers = magicNumber.getBytes(Charset.defaultCharset()); // explicitly uses the platform default
238                                                                            // charset
239        this.byteOffset = offset;
240    }
241
242    /**
243     * <p>
244     * Accepts the provided file if the file contains the file filter's magic
245     * number at the specified offset.
246     * </p>
247     *
248     * <p>
249     * If any {@link IOException}s occur while reading the file, the file will
250     * be rejected.
251     * </p>
252     *
253     * @param file the file to accept or reject.
254     *
255     * @return {@code true} if the file contains the filter's magic number
256     *         at the specified offset, {@code false} otherwise.
257     */
258    @Override
259    public boolean accept(final File file) {
260        if (file != null && file.isFile() && file.canRead()) {
261            try {
262                try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
263                    final byte[] fileBytes = IOUtils.byteArray(this.magicNumbers.length);
264                    randomAccessFile.seek(byteOffset);
265                    final int read = randomAccessFile.read(fileBytes);
266                    if (read != magicNumbers.length) {
267                        return false;
268                    }
269                    return Arrays.equals(this.magicNumbers, fileBytes);
270                }
271            }
272            catch (final IOException ioe) {
273                // Do nothing, fall through and do not accept file
274            }
275        }
276
277        return false;
278    }
279
280    /**
281     * <p>
282     * Accepts the provided file if the file contains the file filter's magic
283     * number at the specified offset.
284     * </p>
285     *
286     * <p>
287     * If any {@link IOException}s occur while reading the file, the file will
288     * be rejected.
289     * </p>
290     * @param file the file to accept or reject.
291     *
292     * @return {@code true} if the file contains the filter's magic number
293     *         at the specified offset, {@code false} otherwise.
294     * @since 2.9.0
295     */
296    @Override
297    public FileVisitResult accept(final Path file, final BasicFileAttributes attributes) {
298        if (file != null && Files.isRegularFile(file) && Files.isReadable(file)) {
299            try {
300                try (final FileChannel fileChannel = FileChannel.open(file)) {
301                    final ByteBuffer byteBuffer = ByteBuffer.allocate(this.magicNumbers.length);
302                    final int read = fileChannel.read(byteBuffer);
303                    if (read != magicNumbers.length) {
304                        return FileVisitResult.TERMINATE;
305                    }
306                    return toFileVisitResult(Arrays.equals(this.magicNumbers, byteBuffer.array()), file);
307                }
308            }
309            catch (final IOException ioe) {
310                // Do nothing, fall through and do not accept file
311            }
312        }
313        return FileVisitResult.TERMINATE;
314    }
315
316    /**
317     * Returns a String representation of the file filter, which includes the
318     * magic number bytes and byte offset.
319     *
320     * @return a String representation of the file filter.
321     */
322    @Override
323    public String toString() {
324        final StringBuilder builder = new StringBuilder(super.toString());
325        builder.append("(");
326        builder.append(new String(magicNumbers, Charset.defaultCharset()));// TODO perhaps use hex if value is not
327                                                                           // printable
328        builder.append(",");
329        builder.append(this.byteOffset);
330        builder.append(")");
331        return builder.toString();
332    }
333}