checks/javadoc/JavadocStyleCheck.java

////////////////////////////////////////////////////////////////////////////////
// checkstyle: Checks Java source code for adherence to a set of rules.
// Copyright (C) 2001-2019 the original author or authors.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
////////////////////////////////////////////////////////////////////////////////

package com.puppycrawl.tools.checkstyle.checks.javadoc;

import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Collections;
import java.util.Deque;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
import com.puppycrawl.tools.checkstyle.StatelessCheck;
import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
import com.puppycrawl.tools.checkstyle.api.DetailAST;
import com.puppycrawl.tools.checkstyle.api.FileContents;
import com.puppycrawl.tools.checkstyle.api.Scope;
import com.puppycrawl.tools.checkstyle.api.TextBlock;
import com.puppycrawl.tools.checkstyle.api.TokenTypes;
import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
import com.puppycrawl.tools.checkstyle.utils.ScopeUtil;

/**
 * Custom Checkstyle Check to validate Javadoc.
 *
 */
@StatelessCheck
public class JavadocStyleCheck
    extends AbstractCheck {

    /** Message property key for the Unclosed HTML message. */
    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";

    /** Message property key for the Unclosed HTML message. */
    public static final String MSG_EMPTY = "javadoc.empty";

    /** Message property key for the Unclosed HTML message. */
    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";

    /** Message property key for the Unclosed HTML message. */
    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";

    /** Message property key for the Unclosed HTML message. */
    public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;

    /** Message property key for the Extra HTML message. */
    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";

    /** HTML tags that do not require a close tag. */
    private static final Set<String> SINGLE_TAGS = Collections.unmodifiableSortedSet(
        Arrays.stream(new String[] {"br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th", })
            .collect(Collectors.toCollection(TreeSet::new)));

    /** HTML tags that are allowed in java docs.
     * From https://www.w3schools.com/tags/default.asp
     * The forms and structure tags are not allowed
     */
    private static final Set<String> ALLOWED_TAGS = Collections.unmodifiableSortedSet(
        Arrays.stream(new String[] {
            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
            "thead", "tr", "tt", "u", "ul", "var", })
        .collect(Collectors.toCollection(TreeSet::new)));

    /** The scope to check. */
    private Scope scope = Scope.PRIVATE;

    /** The visibility scope where Javadoc comments shouldn't be checked. **/
    private Scope excludeScope;

    /** Format for matching the end of a sentence. */
    private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");

    /**
     * Indicates if the first sentence should be checked for proper end of
     * sentence punctuation.
     */
    private boolean checkFirstSentence = true;

    /**
     * Indicates if the HTML within the comment should be checked.
     */
    private boolean checkHtml = true;

    /**
     * Indicates if empty javadoc statements should be checked.
     */
    private boolean checkEmptyJavadoc;

    @Override
    public int[] getDefaultTokens() {
        return getAcceptableTokens();
    }

    @Override
    public int[] getAcceptableTokens() {
        return new int[] {
            TokenTypes.ANNOTATION_DEF,
            TokenTypes.ANNOTATION_FIELD_DEF,
            TokenTypes.CLASS_DEF,
            TokenTypes.CTOR_DEF,
            TokenTypes.ENUM_CONSTANT_DEF,
            TokenTypes.ENUM_DEF,
            TokenTypes.INTERFACE_DEF,
            TokenTypes.METHOD_DEF,
            TokenTypes.PACKAGE_DEF,
            TokenTypes.VARIABLE_DEF,
        };
    }

    @Override
    public int[] getRequiredTokens() {
        return CommonUtil.EMPTY_INT_ARRAY;
    }

    @Override
    public void visitToken(DetailAST ast) {
        if (shouldCheck(ast)) {
            final FileContents contents = getFileContents();
            // Need to start searching for the comment before the annotations
            // that may exist. Even if annotations are not defined on the
            // package, the ANNOTATIONS AST is defined.
            final TextBlock textBlock =
                contents.getJavadocBefore(ast.getFirstChild().getLineNo());

            checkComment(ast, textBlock);
        }
    }

    /**
     * Whether we should check this node.
     * @param ast a given node.
     * @return whether we should check a given node.
     */
    private boolean shouldCheck(final DetailAST ast) {
        boolean check = false;

        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
            check = getFileContents().inPackageInfo();
        }
        else if (!ScopeUtil.isInCodeBlock(ast)) {
            final Scope customScope;

            if (ScopeUtil.isInInterfaceOrAnnotationBlock(ast)
                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
                customScope = Scope.PUBLIC;
            }
            else {
                customScope = ScopeUtil.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
            }
            final Scope surroundingScope = ScopeUtil.getSurroundingScope(ast);

            check = customScope.isIn(scope)
                    && (surroundingScope == null || surroundingScope.isIn(scope))
                    && (excludeScope == null
                        || !customScope.isIn(excludeScope)
                        || surroundingScope != null
                            && !surroundingScope.isIn(excludeScope));
        }
        return check;
    }

    /**
     * Performs the various checks against the Javadoc comment.
     *
     * @param ast the AST of the element being documented
     * @param comment the source lines that make up the Javadoc comment.
     *
     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
     * @see #checkHtmlTags(DetailAST, TextBlock)
     */
    private void checkComment(final DetailAST ast, final TextBlock comment) {
        if (comment == null) {
            // checking for missing docs in JavadocStyleCheck is not consistent
            // with the rest of CheckStyle...  Even though, I didn't think it
            // made sense to make another check just to ensure that the
            // package-info.java file actually contains package Javadocs.
            if (getFileContents().inPackageInfo()) {
                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
            }
        }
        else {
            if (checkFirstSentence) {
                checkFirstSentenceEnding(ast, comment);
            }

            if (checkHtml) {
                checkHtmlTags(ast, comment);
            }

            if (checkEmptyJavadoc) {
                checkJavadocIsNotEmpty(comment);
            }
        }
    }

    /**
     * Checks that the first sentence ends with proper punctuation.  This method
     * uses a regular expression that checks for the presence of a period,
     * question mark, or exclamation mark followed either by whitespace, an
     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
     *
     * @param ast the current node
     * @param comment the source lines that make up the Javadoc comment.
     */
    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
        final String commentText = getCommentText(comment.getText());

        if (!commentText.isEmpty()
            && !endOfSentenceFormat.matcher(commentText).find()
            && !(commentText.startsWith("{@inheritDoc}")
            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
            log(comment.getStartLineNo(), MSG_NO_PERIOD);
        }
    }

    /**
     * Checks that the Javadoc is not empty.
     *
     * @param comment the source lines that make up the Javadoc comment.
     */
    private void checkJavadocIsNotEmpty(TextBlock comment) {
        final String commentText = getCommentText(comment.getText());

        if (commentText.isEmpty()) {
            log(comment.getStartLineNo(), MSG_EMPTY);
        }
    }

    /**
     * Returns the comment text from the Javadoc.
     * @param comments the lines of Javadoc.
     * @return a comment text String.
     */
    private static String getCommentText(String... comments) {
        final StringBuilder builder = new StringBuilder(1024);
        for (final String line : comments) {
            final int textStart = findTextStart(line);

            if (textStart != -1) {
                if (line.charAt(textStart) == '@') {
                    //we have found the tag section
                    break;
                }
                builder.append(line.substring(textStart));
                trimTail(builder);
                builder.append('\n');
            }
        }

        return builder.toString().trim();
    }

    /**
     * Finds the index of the first non-whitespace character ignoring the
     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
     * leading asterisk.
     * @param line the Javadoc comment line of text to scan.
     * @return the int index relative to 0 for the start of text
     *         or -1 if not found.
     */
    private static int findTextStart(String line) {
        int textStart = -1;
        int index = 0;
        while (index < line.length()) {
            if (!Character.isWhitespace(line.charAt(index))) {
                if (line.regionMatches(index, "/**", 0, "/**".length())) {
                    index += 2;
                }
                else if (line.regionMatches(index, "*/", 0, 2)) {
                    index++;
                }
                else if (line.charAt(index) != '*') {
                    textStart = index;
                    break;
                }
            }
            index++;
        }
        return textStart;
    }

    /**
     * Trims any trailing whitespace or the end of Javadoc comment string.
     * @param builder the StringBuilder to trim.
     */
    private static void trimTail(StringBuilder builder) {
        int index = builder.length() - 1;
        while (true) {
            if (Character.isWhitespace(builder.charAt(index))) {
                builder.deleteCharAt(index);
            }
            else if (index > 0 && builder.charAt(index) == '/'
                    && builder.charAt(index - 1) == '*') {
                builder.deleteCharAt(index);
                builder.deleteCharAt(index - 1);
                index--;
                while (builder.charAt(index - 1) == '*') {
                    builder.deleteCharAt(index - 1);
                    index--;
                }
            }
            else {
                break;
            }
            index--;
        }
    }

    /**
     * Checks the comment for HTML tags that do not have a corresponding close
     * tag or a close tag that has no previous open tag.  This code was
     * primarily copied from the DocCheck checkHtml method.
     *
     * @param ast the node with the Javadoc
     * @param comment the {@code TextBlock} which represents
     *                 the Javadoc comment.
     * @noinspection MethodWithMultipleReturnPoints
     */
    // -@cs[ReturnCount] Too complex to break apart.
    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
        final int lineNo = comment.getStartLineNo();
        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
        final String[] text = comment.getText();

        final TagParser parser = new TagParser(text, lineNo);

        while (parser.hasNextTag()) {
            final HtmlTag tag = parser.nextTag();

            if (tag.isIncompleteTag()) {
                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
                    text[tag.getLineNo() - lineNo]);
                return;
            }
            if (tag.isClosedTag()) {
                //do nothing
                continue;
            }
            if (tag.isCloseTag()) {
                // We have found a close tag.
                if (isExtraHtml(tag.getId(), htmlStack)) {
                    // No corresponding open tag was found on the stack.
                    log(tag.getLineNo(),
                        tag.getPosition(),
                        MSG_EXTRA_HTML,
                        tag.getText());
                }
                else {
                    // See if there are any unclosed tags that were opened
                    // after this one.
                    checkUnclosedTags(htmlStack, tag.getId());
                }
            }
            else {
                //We only push html tags that are allowed
                if (isAllowedTag(tag)) {
                    htmlStack.push(tag);
                }
            }
        }

        // Identify any tags left on the stack.
        // Skip multiples, like <b>...<b>
        String lastFound = "";
        final List<String> typeParameters = CheckUtil.getTypeParameterNames(ast);
        for (final HtmlTag htmlTag : htmlStack) {
            if (!isSingleTag(htmlTag)
                && !htmlTag.getId().equals(lastFound)
                && !typeParameters.contains(htmlTag.getId())) {
                log(htmlTag.getLineNo(), htmlTag.getPosition(),
                        MSG_UNCLOSED_HTML, htmlTag.getText());
                lastFound = htmlTag.getId();
            }
        }
    }

    /**
     * Checks to see if there are any unclosed tags on the stack.  The token
     * represents a html tag that has been closed and has a corresponding open
     * tag on the stack.  Any tags, except single tags, that were opened
     * (pushed on the stack) after the token are missing a close.
     *
     * @param htmlStack the stack of opened HTML tags.
     * @param token the current HTML tag name that has been closed.
     */
    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
        HtmlTag lastOpenTag = htmlStack.pop();
        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
            // Find unclosed elements. Put them on a stack so the
            // output order won't be back-to-front.
            if (isSingleTag(lastOpenTag)) {
                lastOpenTag = htmlStack.pop();
            }
            else {
                unclosedTags.push(lastOpenTag);
                lastOpenTag = htmlStack.pop();
            }
        }

        // Output the unterminated tags, if any
        // Skip multiples, like <b>..<b>
        String lastFound = "";
        for (final HtmlTag htag : unclosedTags) {
            lastOpenTag = htag;
            if (lastOpenTag.getId().equals(lastFound)) {
                continue;
            }
            lastFound = lastOpenTag.getId();
            log(lastOpenTag.getLineNo(),
                lastOpenTag.getPosition(),
                MSG_UNCLOSED_HTML,
                lastOpenTag.getText());
        }
    }

    /**
     * Determines if the HtmlTag is one which does not require a close tag.
     *
     * @param tag the HtmlTag to check.
     * @return {@code true} if the HtmlTag is a single tag.
     */
    private static boolean isSingleTag(HtmlTag tag) {
        // If its a singleton tag (<p>, <br>, etc.), ignore it
        // Can't simply not put them on the stack, since singletons
        // like <dt> and <dd> (unhappily) may either be terminated
        // or not terminated. Both options are legal.
        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
    }

    /**
     * Determines if the HtmlTag is one which is allowed in a javadoc.
     *
     * @param tag the HtmlTag to check.
     * @return {@code true} if the HtmlTag is an allowed html tag.
     */
    private static boolean isAllowedTag(HtmlTag tag) {
        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
    }

    /**
     * Determines if the given token is an extra HTML tag. This indicates that
     * a close tag was found that does not have a corresponding open tag.
     *
     * @param token an HTML tag id for which a close was found.
     * @param htmlStack a Stack of previous open HTML tags.
     * @return {@code false} if a previous open tag was found
     *         for the token.
     */
    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
        boolean isExtra = true;
        for (final HtmlTag tag : htmlStack) {
            // Loop, looking for tags that are closed.
            // The loop is needed in case there are unclosed
            // tags on the stack. In that case, the stack would
            // not be empty, but this tag would still be extra.
            if (token.equalsIgnoreCase(tag.getId())) {
                isExtra = false;
                break;
            }
        }

        return isExtra;
    }

    /**
     * Sets the scope to check.
     * @param scope a scope.
     */
    public void setScope(Scope scope) {
        this.scope = scope;
    }

    /**
     * Set the excludeScope.
     * @param excludeScope a scope.
     */
    public void setExcludeScope(Scope excludeScope) {
        this.excludeScope = excludeScope;
    }

    /**
     * Set the format for matching the end of a sentence.
     * @param pattern a pattern.
     */
    public void setEndOfSentenceFormat(Pattern pattern) {
        endOfSentenceFormat = pattern;
    }

    /**
     * Sets the flag that determines if the first sentence is checked for
     * proper end of sentence punctuation.
     * @param flag {@code true} if the first sentence is to be checked
     */
    public void setCheckFirstSentence(boolean flag) {
        checkFirstSentence = flag;
    }

    /**
     * Sets the flag that determines if HTML checking is to be performed.
     * @param flag {@code true} if HTML checking is to be performed.
     */
    public void setCheckHtml(boolean flag) {
        checkHtml = flag;
    }

    /**
     * Sets the flag that determines if empty Javadoc checking should be done.
     * @param flag {@code true} if empty Javadoc checking should be done.
     */
    public void setCheckEmptyJavadoc(boolean flag) {
        checkEmptyJavadoc = flag;
    }

}