001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2018 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.List;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039
040import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
041
042/**
043 * Represents the text contents of a file of arbitrary plain text type.
044 * <p>
045 * This class will be passed to instances of class FileSetCheck by
046 * Checker.
047 * </p>
048 *
049 * @author Martin von Gagern
050 */
051public final class FileText {
052
053    /**
054     * The number of characters to read in one go.
055     */
056    private static final int READ_BUFFER_SIZE = 1024;
057
058    /**
059     * Regular expression pattern matching all line terminators.
060     */
061    private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
062
063    // For now, we always keep both full text and lines array.
064    // In the long run, however, the one passed at initialization might be
065    // enough, while the other could be lazily created when requested.
066    // This would save memory but cost CPU cycles.
067
068    /**
069     * The name of the file.
070     * {@code null} if no file name is available for whatever reason.
071     */
072    private final File file;
073
074    /**
075     * The charset used to read the file.
076     * {@code null} if the file was reconstructed from a list of lines.
077     */
078    private final Charset charset;
079
080    /**
081     * The full text contents of the file.
082     */
083    private final String fullText;
084
085    /**
086     * The lines of the file, without terminators.
087     */
088    private final String[] lines;
089
090    /**
091     * The first position of each line within the full text.
092     */
093    private int[] lineBreaks;
094
095    /**
096     * Creates a new file text representation.
097     *
098     * <p>The file will be read using the specified encoding, replacing
099     * malformed input and unmappable characters with the default
100     * replacement character.
101     *
102     * @param file the name of the file
103     * @param charsetName the encoding to use when reading the file
104     * @throws NullPointerException if the text is null
105     * @throws IOException if the file could not be read
106     */
107    public FileText(File file, String charsetName) throws IOException {
108        this.file = file;
109
110        // We use our own decoder, to be sure we have complete control
111        // about replacements.
112        final CharsetDecoder decoder;
113        try {
114            charset = Charset.forName(charsetName);
115            decoder = charset.newDecoder();
116            decoder.onMalformedInput(CodingErrorAction.REPLACE);
117            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
118        }
119        catch (final UnsupportedCharsetException ex) {
120            final String message = "Unsupported charset: " + charsetName;
121            throw new IllegalStateException(message, ex);
122        }
123
124        fullText = readFile(file, decoder);
125
126        // Use the BufferedReader to break down the lines as this
127        // is about 30% faster than using the
128        // LINE_TERMINATOR.split(fullText, -1) method
129        final BufferedReader reader = new BufferedReader(new StringReader(fullText));
130        try {
131            final ArrayList<String> textLines = new ArrayList<>();
132            while (true) {
133                final String line = reader.readLine();
134                if (line == null) {
135                    break;
136                }
137                textLines.add(line);
138            }
139            lines = textLines.toArray(new String[textLines.size()]);
140        }
141        finally {
142            CommonUtils.close(reader);
143        }
144    }
145
146    /**
147     * Copy constructor.
148     * @param fileText to make copy of
149     */
150    public FileText(FileText fileText) {
151        file = fileText.file;
152        charset = fileText.charset;
153        fullText = fileText.fullText;
154        lines = fileText.lines.clone();
155        if (fileText.lineBreaks == null) {
156            lineBreaks = null;
157        }
158        else {
159            lineBreaks = fileText.lineBreaks.clone();
160        }
161    }
162
163    /**
164     * Compatibility constructor.
165     *
166     * <p>This constructor reconstructs the text of the file by joining
167     * lines with linefeed characters. This process does not restore
168     * the original line terminators and should therefore be avoided.
169     *
170     * @param file the name of the file
171     * @param lines the lines of the text, without terminators
172     * @throws NullPointerException if the lines array is null
173     */
174    public FileText(File file, List<String> lines) {
175        final StringBuilder buf = new StringBuilder(1024);
176        for (final String line : lines) {
177            buf.append(line).append('\n');
178        }
179
180        this.file = file;
181        charset = null;
182        fullText = buf.toString();
183        this.lines = lines.toArray(new String[lines.size()]);
184    }
185
186    /**
187     * Reads file using specific decoder and returns all its content as a String.
188     * @param inputFile File to read
189     * @param decoder Charset decoder
190     * @return File's text
191     * @throws IOException Unable to open or read the file
192     */
193    private static String readFile(final File inputFile, final CharsetDecoder decoder)
194            throws IOException {
195        if (!inputFile.exists()) {
196            throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
197        }
198        final StringBuilder buf = new StringBuilder(1024);
199        final FileInputStream stream = new FileInputStream(inputFile);
200        final Reader reader = new InputStreamReader(stream, decoder);
201        try {
202            final char[] chars = new char[READ_BUFFER_SIZE];
203            while (true) {
204                final int len = reader.read(chars);
205                if (len == -1) {
206                    break;
207                }
208                buf.append(chars, 0, len);
209            }
210        }
211        finally {
212            CommonUtils.close(reader);
213        }
214        return buf.toString();
215    }
216
217    /**
218     * Get the name of the file.
219     * @return an object containing the name of the file
220     */
221    public File getFile() {
222        return file;
223    }
224
225    /**
226     * Get the character set which was used to read the file.
227     * Will be {@code null} for a file reconstructed from its lines.
228     * @return the charset used when the file was read
229     */
230    public Charset getCharset() {
231        return charset;
232    }
233
234    /**
235     * Retrieve the full text of the file.
236     * @return the full text of the file
237     */
238    public CharSequence getFullText() {
239        return fullText;
240    }
241
242    /**
243     * Returns an array of all lines.
244     * {@code text.toLinesArray()} is equivalent to
245     * {@code text.toArray(new String[text.size()])}.
246     * @return an array of all lines of the text
247     */
248    public String[] toLinesArray() {
249        return lines.clone();
250    }
251
252    /**
253     * Find positions of line breaks in the full text.
254     * @return an array giving the first positions of each line.
255     */
256    private int[] findLineBreaks() {
257        if (lineBreaks == null) {
258            final int[] lineBreakPositions = new int[size() + 1];
259            lineBreakPositions[0] = 0;
260            int lineNo = 1;
261            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
262            while (matcher.find()) {
263                lineBreakPositions[lineNo] = matcher.end();
264                lineNo++;
265            }
266            if (lineNo < lineBreakPositions.length) {
267                lineBreakPositions[lineNo] = fullText.length();
268            }
269            lineBreaks = lineBreakPositions;
270        }
271        return lineBreaks;
272    }
273
274    /**
275     * Determine line and column numbers in full text.
276     * @param pos the character position in the full text
277     * @return the line and column numbers of this character
278     */
279    public LineColumn lineColumn(int pos) {
280        final int[] lineBreakPositions = findLineBreaks();
281        int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
282        if (lineNo < 0) {
283            // we have: lineNo = -(insertion point) - 1
284            // we want: lineNo =  (insertion point) - 1
285            lineNo = -lineNo - 2;
286        }
287        final int startOfLine = lineBreakPositions[lineNo];
288        final int columnNo = pos - startOfLine;
289        // now we have lineNo and columnNo, both starting at zero.
290        return new LineColumn(lineNo + 1, columnNo);
291    }
292
293    /**
294     * Retrieves a line of the text by its number.
295     * The returned line will not contain a trailing terminator.
296     * @param lineNo the number of the line to get, starting at zero
297     * @return the line with the given number
298     */
299    public String get(final int lineNo) {
300        return lines[lineNo];
301    }
302
303    /**
304     * Counts the lines of the text.
305     * @return the number of lines in the text
306     */
307    public int size() {
308        return lines.length;
309    }
310
311}