001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2018 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Arrays;
024import java.util.Collections;
025import java.util.Deque;
026import java.util.List;
027import java.util.Locale;
028import java.util.Set;
029import java.util.TreeSet;
030import java.util.regex.Pattern;
031import java.util.stream.Collectors;
032
033import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
034import com.puppycrawl.tools.checkstyle.StatelessCheck;
035import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
036import com.puppycrawl.tools.checkstyle.api.DetailAST;
037import com.puppycrawl.tools.checkstyle.api.FileContents;
038import com.puppycrawl.tools.checkstyle.api.Scope;
039import com.puppycrawl.tools.checkstyle.api.TextBlock;
040import com.puppycrawl.tools.checkstyle.api.TokenTypes;
041import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
042import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
043import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
044
045/**
046 * Custom Checkstyle Check to validate Javadoc.
047 *
048 * @author Chris Stillwell
049 * @author Daniel Grenner
050 * @author Travis Schneeberger
051 */
052@StatelessCheck
053public class JavadocStyleCheck
054    extends AbstractCheck {
055
056    /** Message property key for the Unclosed HTML message. */
057    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
058
059    /** Message property key for the Unclosed HTML message. */
060    public static final String MSG_EMPTY = "javadoc.empty";
061
062    /** Message property key for the Unclosed HTML message. */
063    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
064
065    /** Message property key for the Unclosed HTML message. */
066    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
067
068    /** Message property key for the Unclosed HTML message. */
069    public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
070
071    /** Message property key for the Extra HTML message. */
072    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
073
074    /** HTML tags that do not require a close tag. */
075    private static final Set<String> SINGLE_TAGS = Collections.unmodifiableSortedSet(
076        Arrays.stream(new String[] {"br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th", })
077            .collect(Collectors.toCollection(TreeSet::new)));
078
079    /** HTML tags that are allowed in java docs.
080     * From https://www.w3schools.com/tags/default.asp
081     * The forms and structure tags are not allowed
082     */
083    private static final Set<String> ALLOWED_TAGS = Collections.unmodifiableSortedSet(
084        Arrays.stream(new String[] {
085            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
086            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
087            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
088            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
089            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
090            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
091            "thead", "tr", "tt", "u", "ul", "var", })
092        .collect(Collectors.toCollection(TreeSet::new)));
093
094    /** The scope to check. */
095    private Scope scope = Scope.PRIVATE;
096
097    /** The visibility scope where Javadoc comments shouldn't be checked. **/
098    private Scope excludeScope;
099
100    /** Format for matching the end of a sentence. */
101    private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
102
103    /**
104     * Indicates if the first sentence should be checked for proper end of
105     * sentence punctuation.
106     */
107    private boolean checkFirstSentence = true;
108
109    /**
110     * Indicates if the HTML within the comment should be checked.
111     */
112    private boolean checkHtml = true;
113
114    /**
115     * Indicates if empty javadoc statements should be checked.
116     */
117    private boolean checkEmptyJavadoc;
118
119    @Override
120    public int[] getDefaultTokens() {
121        return getAcceptableTokens();
122    }
123
124    @Override
125    public int[] getAcceptableTokens() {
126        return new int[] {
127            TokenTypes.ANNOTATION_DEF,
128            TokenTypes.ANNOTATION_FIELD_DEF,
129            TokenTypes.CLASS_DEF,
130            TokenTypes.CTOR_DEF,
131            TokenTypes.ENUM_CONSTANT_DEF,
132            TokenTypes.ENUM_DEF,
133            TokenTypes.INTERFACE_DEF,
134            TokenTypes.METHOD_DEF,
135            TokenTypes.PACKAGE_DEF,
136            TokenTypes.VARIABLE_DEF,
137        };
138    }
139
140    @Override
141    public int[] getRequiredTokens() {
142        return CommonUtils.EMPTY_INT_ARRAY;
143    }
144
145    @Override
146    public void visitToken(DetailAST ast) {
147        if (shouldCheck(ast)) {
148            final FileContents contents = getFileContents();
149            // Need to start searching for the comment before the annotations
150            // that may exist. Even if annotations are not defined on the
151            // package, the ANNOTATIONS AST is defined.
152            final TextBlock textBlock =
153                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
154
155            checkComment(ast, textBlock);
156        }
157    }
158
159    /**
160     * Whether we should check this node.
161     * @param ast a given node.
162     * @return whether we should check a given node.
163     */
164    private boolean shouldCheck(final DetailAST ast) {
165        boolean check = false;
166
167        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
168            check = getFileContents().inPackageInfo();
169        }
170        else if (!ScopeUtils.isInCodeBlock(ast)) {
171            final Scope customScope;
172
173            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
174                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
175                customScope = Scope.PUBLIC;
176            }
177            else {
178                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
179            }
180            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
181
182            check = customScope.isIn(scope)
183                    && (surroundingScope == null || surroundingScope.isIn(scope))
184                    && (excludeScope == null
185                        || !customScope.isIn(excludeScope)
186                        || surroundingScope != null
187                            && !surroundingScope.isIn(excludeScope));
188        }
189        return check;
190    }
191
192    /**
193     * Performs the various checks against the Javadoc comment.
194     *
195     * @param ast the AST of the element being documented
196     * @param comment the source lines that make up the Javadoc comment.
197     *
198     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
199     * @see #checkHtmlTags(DetailAST, TextBlock)
200     */
201    private void checkComment(final DetailAST ast, final TextBlock comment) {
202        if (comment == null) {
203            // checking for missing docs in JavadocStyleCheck is not consistent
204            // with the rest of CheckStyle...  Even though, I didn't think it
205            // made sense to make another check just to ensure that the
206            // package-info.java file actually contains package Javadocs.
207            if (getFileContents().inPackageInfo()) {
208                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
209            }
210        }
211        else {
212            if (checkFirstSentence) {
213                checkFirstSentenceEnding(ast, comment);
214            }
215
216            if (checkHtml) {
217                checkHtmlTags(ast, comment);
218            }
219
220            if (checkEmptyJavadoc) {
221                checkJavadocIsNotEmpty(comment);
222            }
223        }
224    }
225
226    /**
227     * Checks that the first sentence ends with proper punctuation.  This method
228     * uses a regular expression that checks for the presence of a period,
229     * question mark, or exclamation mark followed either by whitespace, an
230     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
231     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
232     *
233     * @param ast the current node
234     * @param comment the source lines that make up the Javadoc comment.
235     */
236    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
237        final String commentText = getCommentText(comment.getText());
238
239        if (!commentText.isEmpty()
240            && !endOfSentenceFormat.matcher(commentText).find()
241            && !(commentText.startsWith("{@inheritDoc}")
242            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
243            log(comment.getStartLineNo(), MSG_NO_PERIOD);
244        }
245    }
246
247    /**
248     * Checks that the Javadoc is not empty.
249     *
250     * @param comment the source lines that make up the Javadoc comment.
251     */
252    private void checkJavadocIsNotEmpty(TextBlock comment) {
253        final String commentText = getCommentText(comment.getText());
254
255        if (commentText.isEmpty()) {
256            log(comment.getStartLineNo(), MSG_EMPTY);
257        }
258    }
259
260    /**
261     * Returns the comment text from the Javadoc.
262     * @param comments the lines of Javadoc.
263     * @return a comment text String.
264     */
265    private static String getCommentText(String... comments) {
266        final StringBuilder builder = new StringBuilder(1024);
267        for (final String line : comments) {
268            final int textStart = findTextStart(line);
269
270            if (textStart != -1) {
271                if (line.charAt(textStart) == '@') {
272                    //we have found the tag section
273                    break;
274                }
275                builder.append(line.substring(textStart));
276                trimTail(builder);
277                builder.append('\n');
278            }
279        }
280
281        return builder.toString().trim();
282    }
283
284    /**
285     * Finds the index of the first non-whitespace character ignoring the
286     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
287     * leading asterisk.
288     * @param line the Javadoc comment line of text to scan.
289     * @return the int index relative to 0 for the start of text
290     *         or -1 if not found.
291     */
292    private static int findTextStart(String line) {
293        int textStart = -1;
294        int index = 0;
295        while (index < line.length()) {
296            if (!Character.isWhitespace(line.charAt(index))) {
297                if (line.regionMatches(index, "/**", 0, "/**".length())) {
298                    index += 2;
299                }
300                else if (line.regionMatches(index, "*/", 0, 2)) {
301                    index++;
302                }
303                else if (line.charAt(index) != '*') {
304                    textStart = index;
305                    break;
306                }
307            }
308            index++;
309        }
310        return textStart;
311    }
312
313    /**
314     * Trims any trailing whitespace or the end of Javadoc comment string.
315     * @param builder the StringBuilder to trim.
316     */
317    private static void trimTail(StringBuilder builder) {
318        int index = builder.length() - 1;
319        while (true) {
320            if (Character.isWhitespace(builder.charAt(index))) {
321                builder.deleteCharAt(index);
322            }
323            else if (index > 0 && builder.charAt(index) == '/'
324                    && builder.charAt(index - 1) == '*') {
325                builder.deleteCharAt(index);
326                builder.deleteCharAt(index - 1);
327                index--;
328                while (builder.charAt(index - 1) == '*') {
329                    builder.deleteCharAt(index - 1);
330                    index--;
331                }
332            }
333            else {
334                break;
335            }
336            index--;
337        }
338    }
339
340    /**
341     * Checks the comment for HTML tags that do not have a corresponding close
342     * tag or a close tag that has no previous open tag.  This code was
343     * primarily copied from the DocCheck checkHtml method.
344     *
345     * @param ast the node with the Javadoc
346     * @param comment the {@code TextBlock} which represents
347     *                 the Javadoc comment.
348     * @noinspection MethodWithMultipleReturnPoints
349     */
350    // -@cs[ReturnCount] Too complex to break apart.
351    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
352        final int lineNo = comment.getStartLineNo();
353        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
354        final String[] text = comment.getText();
355
356        final TagParser parser = new TagParser(text, lineNo);
357
358        while (parser.hasNextTag()) {
359            final HtmlTag tag = parser.nextTag();
360
361            if (tag.isIncompleteTag()) {
362                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
363                    text[tag.getLineNo() - lineNo]);
364                return;
365            }
366            if (tag.isClosedTag()) {
367                //do nothing
368                continue;
369            }
370            if (tag.isCloseTag()) {
371                // We have found a close tag.
372                if (isExtraHtml(tag.getId(), htmlStack)) {
373                    // No corresponding open tag was found on the stack.
374                    log(tag.getLineNo(),
375                        tag.getPosition(),
376                        MSG_EXTRA_HTML,
377                        tag.getText());
378                }
379                else {
380                    // See if there are any unclosed tags that were opened
381                    // after this one.
382                    checkUnclosedTags(htmlStack, tag.getId());
383                }
384            }
385            else {
386                //We only push html tags that are allowed
387                if (isAllowedTag(tag)) {
388                    htmlStack.push(tag);
389                }
390            }
391        }
392
393        // Identify any tags left on the stack.
394        // Skip multiples, like <b>...<b>
395        String lastFound = "";
396        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
397        for (final HtmlTag htmlTag : htmlStack) {
398            if (!isSingleTag(htmlTag)
399                && !htmlTag.getId().equals(lastFound)
400                && !typeParameters.contains(htmlTag.getId())) {
401                log(htmlTag.getLineNo(), htmlTag.getPosition(),
402                        MSG_UNCLOSED_HTML, htmlTag.getText());
403                lastFound = htmlTag.getId();
404            }
405        }
406    }
407
408    /**
409     * Checks to see if there are any unclosed tags on the stack.  The token
410     * represents a html tag that has been closed and has a corresponding open
411     * tag on the stack.  Any tags, except single tags, that were opened
412     * (pushed on the stack) after the token are missing a close.
413     *
414     * @param htmlStack the stack of opened HTML tags.
415     * @param token the current HTML tag name that has been closed.
416     */
417    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
418        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
419        HtmlTag lastOpenTag = htmlStack.pop();
420        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
421            // Find unclosed elements. Put them on a stack so the
422            // output order won't be back-to-front.
423            if (isSingleTag(lastOpenTag)) {
424                lastOpenTag = htmlStack.pop();
425            }
426            else {
427                unclosedTags.push(lastOpenTag);
428                lastOpenTag = htmlStack.pop();
429            }
430        }
431
432        // Output the unterminated tags, if any
433        // Skip multiples, like <b>..<b>
434        String lastFound = "";
435        for (final HtmlTag htag : unclosedTags) {
436            lastOpenTag = htag;
437            if (lastOpenTag.getId().equals(lastFound)) {
438                continue;
439            }
440            lastFound = lastOpenTag.getId();
441            log(lastOpenTag.getLineNo(),
442                lastOpenTag.getPosition(),
443                MSG_UNCLOSED_HTML,
444                lastOpenTag.getText());
445        }
446    }
447
448    /**
449     * Determines if the HtmlTag is one which does not require a close tag.
450     *
451     * @param tag the HtmlTag to check.
452     * @return {@code true} if the HtmlTag is a single tag.
453     */
454    private static boolean isSingleTag(HtmlTag tag) {
455        // If its a singleton tag (<p>, <br>, etc.), ignore it
456        // Can't simply not put them on the stack, since singletons
457        // like <dt> and <dd> (unhappily) may either be terminated
458        // or not terminated. Both options are legal.
459        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
460    }
461
462    /**
463     * Determines if the HtmlTag is one which is allowed in a javadoc.
464     *
465     * @param tag the HtmlTag to check.
466     * @return {@code true} if the HtmlTag is an allowed html tag.
467     */
468    private static boolean isAllowedTag(HtmlTag tag) {
469        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
470    }
471
472    /**
473     * Determines if the given token is an extra HTML tag. This indicates that
474     * a close tag was found that does not have a corresponding open tag.
475     *
476     * @param token an HTML tag id for which a close was found.
477     * @param htmlStack a Stack of previous open HTML tags.
478     * @return {@code false} if a previous open tag was found
479     *         for the token.
480     */
481    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
482        boolean isExtra = true;
483        for (final HtmlTag tag : htmlStack) {
484            // Loop, looking for tags that are closed.
485            // The loop is needed in case there are unclosed
486            // tags on the stack. In that case, the stack would
487            // not be empty, but this tag would still be extra.
488            if (token.equalsIgnoreCase(tag.getId())) {
489                isExtra = false;
490                break;
491            }
492        }
493
494        return isExtra;
495    }
496
497    /**
498     * Sets the scope to check.
499     * @param scope a scope.
500     */
501    public void setScope(Scope scope) {
502        this.scope = scope;
503    }
504
505    /**
506     * Set the excludeScope.
507     * @param excludeScope a scope.
508     */
509    public void setExcludeScope(Scope excludeScope) {
510        this.excludeScope = excludeScope;
511    }
512
513    /**
514     * Set the format for matching the end of a sentence.
515     * @param pattern a pattern.
516     */
517    public void setEndOfSentenceFormat(Pattern pattern) {
518        endOfSentenceFormat = pattern;
519    }
520
521    /**
522     * Sets the flag that determines if the first sentence is checked for
523     * proper end of sentence punctuation.
524     * @param flag {@code true} if the first sentence is to be checked
525     */
526    public void setCheckFirstSentence(boolean flag) {
527        checkFirstSentence = flag;
528    }
529
530    /**
531     * Sets the flag that determines if HTML checking is to be performed.
532     * @param flag {@code true} if HTML checking is to be performed.
533     */
534    public void setCheckHtml(boolean flag) {
535        checkHtml = flag;
536    }
537
538    /**
539     * Sets the flag that determines if empty Javadoc checking should be done.
540     * @param flag {@code true} if empty Javadoc checking should be done.
541     */
542    public void setCheckEmptyJavadoc(boolean flag) {
543        checkEmptyJavadoc = flag;
544    }
545
546}