1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2019 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.ArrayDeque;
23  import java.util.Arrays;
24  import java.util.Collections;
25  import java.util.Deque;
26  import java.util.List;
27  import java.util.Locale;
28  import java.util.Set;
29  import java.util.TreeSet;
30  import java.util.regex.Pattern;
31  import java.util.stream.Collectors;
32  
33  import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
34  import com.puppycrawl.tools.checkstyle.StatelessCheck;
35  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
36  import com.puppycrawl.tools.checkstyle.api.DetailAST;
37  import com.puppycrawl.tools.checkstyle.api.FileContents;
38  import com.puppycrawl.tools.checkstyle.api.Scope;
39  import com.puppycrawl.tools.checkstyle.api.TextBlock;
40  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
41  import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
42  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
43  import com.puppycrawl.tools.checkstyle.utils.ScopeUtil;
44  
45  /**
46   * Custom Checkstyle Check to validate Javadoc.
47   *
48   */
49  @StatelessCheck
50  public class JavadocStyleCheck
51      extends AbstractCheck {
52  
53      /** Message property key for the Unclosed HTML message. */
54      public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
55  
56      /** Message property key for the Unclosed HTML message. */
57      public static final String MSG_EMPTY = "javadoc.empty";
58  
59      /** Message property key for the Unclosed HTML message. */
60      public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
61  
62      /** Message property key for the Unclosed HTML message. */
63      public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
64  
65      /** Message property key for the Unclosed HTML message. */
66      public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
67  
68      /** Message property key for the Extra HTML message. */
69      public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
70  
71      /** HTML tags that do not require a close tag. */
72      private static final Set<String> SINGLE_TAGS = Collections.unmodifiableSortedSet(
73          Arrays.stream(new String[] {"br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th", })
74              .collect(Collectors.toCollection(TreeSet::new)));
75  
76      /** HTML tags that are allowed in java docs.
77       * From https://www.w3schools.com/tags/default.asp
78       * The forms and structure tags are not allowed
79       */
80      private static final Set<String> ALLOWED_TAGS = Collections.unmodifiableSortedSet(
81          Arrays.stream(new String[] {
82              "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
83              "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
84              "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
85              "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
86              "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
87              "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
88              "thead", "tr", "tt", "u", "ul", "var", })
89          .collect(Collectors.toCollection(TreeSet::new)));
90  
91      /** The scope to check. */
92      private Scope scope = Scope.PRIVATE;
93  
94      /** The visibility scope where Javadoc comments shouldn't be checked. **/
95      private Scope excludeScope;
96  
97      /** Format for matching the end of a sentence. */
98      private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
99  
100     /**
101      * Indicates if the first sentence should be checked for proper end of
102      * sentence punctuation.
103      */
104     private boolean checkFirstSentence = true;
105 
106     /**
107      * Indicates if the HTML within the comment should be checked.
108      */
109     private boolean checkHtml = true;
110 
111     /**
112      * Indicates if empty javadoc statements should be checked.
113      */
114     private boolean checkEmptyJavadoc;
115 
116     @Override
117     public int[] getDefaultTokens() {
118         return getAcceptableTokens();
119     }
120 
121     @Override
122     public int[] getAcceptableTokens() {
123         return new int[] {
124             TokenTypes.ANNOTATION_DEF,
125             TokenTypes.ANNOTATION_FIELD_DEF,
126             TokenTypes.CLASS_DEF,
127             TokenTypes.CTOR_DEF,
128             TokenTypes.ENUM_CONSTANT_DEF,
129             TokenTypes.ENUM_DEF,
130             TokenTypes.INTERFACE_DEF,
131             TokenTypes.METHOD_DEF,
132             TokenTypes.PACKAGE_DEF,
133             TokenTypes.VARIABLE_DEF,
134         };
135     }
136 
137     @Override
138     public int[] getRequiredTokens() {
139         return CommonUtil.EMPTY_INT_ARRAY;
140     }
141 
142     @Override
143     public void visitToken(DetailAST ast) {
144         if (shouldCheck(ast)) {
145             final FileContents contents = getFileContents();
146             // Need to start searching for the comment before the annotations
147             // that may exist. Even if annotations are not defined on the
148             // package, the ANNOTATIONS AST is defined.
149             final TextBlock textBlock =
150                 contents.getJavadocBefore(ast.getFirstChild().getLineNo());
151 
152             checkComment(ast, textBlock);
153         }
154     }
155 
156     /**
157      * Whether we should check this node.
158      * @param ast a given node.
159      * @return whether we should check a given node.
160      */
161     private boolean shouldCheck(final DetailAST ast) {
162         boolean check = false;
163 
164         if (ast.getType() == TokenTypes.PACKAGE_DEF) {
165             check = getFileContents().inPackageInfo();
166         }
167         else if (!ScopeUtil.isInCodeBlock(ast)) {
168             final Scope customScope;
169 
170             if (ScopeUtil.isInInterfaceOrAnnotationBlock(ast)
171                     || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
172                 customScope = Scope.PUBLIC;
173             }
174             else {
175                 customScope = ScopeUtil.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
176             }
177             final Scope surroundingScope = ScopeUtil.getSurroundingScope(ast);
178 
179             check = customScope.isIn(scope)
180                     && (surroundingScope == null || surroundingScope.isIn(scope))
181                     && (excludeScope == null
182                         || !customScope.isIn(excludeScope)
183                         || surroundingScope != null
184                             && !surroundingScope.isIn(excludeScope));
185         }
186         return check;
187     }
188 
189     /**
190      * Performs the various checks against the Javadoc comment.
191      *
192      * @param ast the AST of the element being documented
193      * @param comment the source lines that make up the Javadoc comment.
194      *
195      * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
196      * @see #checkHtmlTags(DetailAST, TextBlock)
197      */
198     private void checkComment(final DetailAST ast, final TextBlock comment) {
199         if (comment == null) {
200             // checking for missing docs in JavadocStyleCheck is not consistent
201             // with the rest of CheckStyle...  Even though, I didn't think it
202             // made sense to make another check just to ensure that the
203             // package-info.java file actually contains package Javadocs.
204             if (getFileContents().inPackageInfo()) {
205                 log(ast.getLineNo(), MSG_JAVADOC_MISSING);
206             }
207         }
208         else {
209             if (checkFirstSentence) {
210                 checkFirstSentenceEnding(ast, comment);
211             }
212 
213             if (checkHtml) {
214                 checkHtmlTags(ast, comment);
215             }
216 
217             if (checkEmptyJavadoc) {
218                 checkJavadocIsNotEmpty(comment);
219             }
220         }
221     }
222 
223     /**
224      * Checks that the first sentence ends with proper punctuation.  This method
225      * uses a regular expression that checks for the presence of a period,
226      * question mark, or exclamation mark followed either by whitespace, an
227      * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
228      * comments for TokenTypes that are valid for {_AT_inheritDoc}.
229      *
230      * @param ast the current node
231      * @param comment the source lines that make up the Javadoc comment.
232      */
233     private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
234         final String commentText = getCommentText(comment.getText());
235 
236         if (!commentText.isEmpty()
237             && !endOfSentenceFormat.matcher(commentText).find()
238             && !(commentText.startsWith("{@inheritDoc}")
239             && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
240             log(comment.getStartLineNo(), MSG_NO_PERIOD);
241         }
242     }
243 
244     /**
245      * Checks that the Javadoc is not empty.
246      *
247      * @param comment the source lines that make up the Javadoc comment.
248      */
249     private void checkJavadocIsNotEmpty(TextBlock comment) {
250         final String commentText = getCommentText(comment.getText());
251 
252         if (commentText.isEmpty()) {
253             log(comment.getStartLineNo(), MSG_EMPTY);
254         }
255     }
256 
257     /**
258      * Returns the comment text from the Javadoc.
259      * @param comments the lines of Javadoc.
260      * @return a comment text String.
261      */
262     private static String getCommentText(String... comments) {
263         final StringBuilder builder = new StringBuilder(1024);
264         for (final String line : comments) {
265             final int textStart = findTextStart(line);
266 
267             if (textStart != -1) {
268                 if (line.charAt(textStart) == '@') {
269                     //we have found the tag section
270                     break;
271                 }
272                 builder.append(line.substring(textStart));
273                 trimTail(builder);
274                 builder.append('\n');
275             }
276         }
277 
278         return builder.toString().trim();
279     }
280 
281     /**
282      * Finds the index of the first non-whitespace character ignoring the
283      * Javadoc comment start and end strings (&#47** and *&#47) as well as any
284      * leading asterisk.
285      * @param line the Javadoc comment line of text to scan.
286      * @return the int index relative to 0 for the start of text
287      *         or -1 if not found.
288      */
289     private static int findTextStart(String line) {
290         int textStart = -1;
291         int index = 0;
292         while (index < line.length()) {
293             if (!Character.isWhitespace(line.charAt(index))) {
294                 if (line.regionMatches(index, "/**", 0, "/**".length())) {
295                     index += 2;
296                 }
297                 else if (line.regionMatches(index, "*/", 0, 2)) {
298                     index++;
299                 }
300                 else if (line.charAt(index) != '*') {
301                     textStart = index;
302                     break;
303                 }
304             }
305             index++;
306         }
307         return textStart;
308     }
309 
310     /**
311      * Trims any trailing whitespace or the end of Javadoc comment string.
312      * @param builder the StringBuilder to trim.
313      */
314     private static void trimTail(StringBuilder builder) {
315         int index = builder.length() - 1;
316         while (true) {
317             if (Character.isWhitespace(builder.charAt(index))) {
318                 builder.deleteCharAt(index);
319             }
320             else if (index > 0 && builder.charAt(index) == '/'
321                     && builder.charAt(index - 1) == '*') {
322                 builder.deleteCharAt(index);
323                 builder.deleteCharAt(index - 1);
324                 index--;
325                 while (builder.charAt(index - 1) == '*') {
326                     builder.deleteCharAt(index - 1);
327                     index--;
328                 }
329             }
330             else {
331                 break;
332             }
333             index--;
334         }
335     }
336 
337     /**
338      * Checks the comment for HTML tags that do not have a corresponding close
339      * tag or a close tag that has no previous open tag.  This code was
340      * primarily copied from the DocCheck checkHtml method.
341      *
342      * @param ast the node with the Javadoc
343      * @param comment the {@code TextBlock} which represents
344      *                 the Javadoc comment.
345      * @noinspection MethodWithMultipleReturnPoints
346      */
347     // -@cs[ReturnCount] Too complex to break apart.
348     private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
349         final int lineNo = comment.getStartLineNo();
350         final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
351         final String[] text = comment.getText();
352 
353         final TagParser parser = new TagParser(text, lineNo);
354 
355         while (parser.hasNextTag()) {
356             final HtmlTag tag = parser.nextTag();
357 
358             if (tag.isIncompleteTag()) {
359                 log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
360                     text[tag.getLineNo() - lineNo]);
361                 return;
362             }
363             if (tag.isClosedTag()) {
364                 //do nothing
365                 continue;
366             }
367             if (tag.isCloseTag()) {
368                 // We have found a close tag.
369                 if (isExtraHtml(tag.getId(), htmlStack)) {
370                     // No corresponding open tag was found on the stack.
371                     log(tag.getLineNo(),
372                         tag.getPosition(),
373                         MSG_EXTRA_HTML,
374                         tag.getText());
375                 }
376                 else {
377                     // See if there are any unclosed tags that were opened
378                     // after this one.
379                     checkUnclosedTags(htmlStack, tag.getId());
380                 }
381             }
382             else {
383                 //We only push html tags that are allowed
384                 if (isAllowedTag(tag)) {
385                     htmlStack.push(tag);
386                 }
387             }
388         }
389 
390         // Identify any tags left on the stack.
391         // Skip multiples, like <b>...<b>
392         String lastFound = "";
393         final List<String> typeParameters = CheckUtil.getTypeParameterNames(ast);
394         for (final HtmlTag htmlTag : htmlStack) {
395             if (!isSingleTag(htmlTag)
396                 && !htmlTag.getId().equals(lastFound)
397                 && !typeParameters.contains(htmlTag.getId())) {
398                 log(htmlTag.getLineNo(), htmlTag.getPosition(),
399                         MSG_UNCLOSED_HTML, htmlTag.getText());
400                 lastFound = htmlTag.getId();
401             }
402         }
403     }
404 
405     /**
406      * Checks to see if there are any unclosed tags on the stack.  The token
407      * represents a html tag that has been closed and has a corresponding open
408      * tag on the stack.  Any tags, except single tags, that were opened
409      * (pushed on the stack) after the token are missing a close.
410      *
411      * @param htmlStack the stack of opened HTML tags.
412      * @param token the current HTML tag name that has been closed.
413      */
414     private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
415         final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
416         HtmlTag lastOpenTag = htmlStack.pop();
417         while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
418             // Find unclosed elements. Put them on a stack so the
419             // output order won't be back-to-front.
420             if (isSingleTag(lastOpenTag)) {
421                 lastOpenTag = htmlStack.pop();
422             }
423             else {
424                 unclosedTags.push(lastOpenTag);
425                 lastOpenTag = htmlStack.pop();
426             }
427         }
428 
429         // Output the unterminated tags, if any
430         // Skip multiples, like <b>..<b>
431         String lastFound = "";
432         for (final HtmlTag htag : unclosedTags) {
433             lastOpenTag = htag;
434             if (lastOpenTag.getId().equals(lastFound)) {
435                 continue;
436             }
437             lastFound = lastOpenTag.getId();
438             log(lastOpenTag.getLineNo(),
439                 lastOpenTag.getPosition(),
440                 MSG_UNCLOSED_HTML,
441                 lastOpenTag.getText());
442         }
443     }
444 
445     /**
446      * Determines if the HtmlTag is one which does not require a close tag.
447      *
448      * @param tag the HtmlTag to check.
449      * @return {@code true} if the HtmlTag is a single tag.
450      */
451     private static boolean isSingleTag(HtmlTag tag) {
452         // If its a singleton tag (<p>, <br>, etc.), ignore it
453         // Can't simply not put them on the stack, since singletons
454         // like <dt> and <dd> (unhappily) may either be terminated
455         // or not terminated. Both options are legal.
456         return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
457     }
458 
459     /**
460      * Determines if the HtmlTag is one which is allowed in a javadoc.
461      *
462      * @param tag the HtmlTag to check.
463      * @return {@code true} if the HtmlTag is an allowed html tag.
464      */
465     private static boolean isAllowedTag(HtmlTag tag) {
466         return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
467     }
468 
469     /**
470      * Determines if the given token is an extra HTML tag. This indicates that
471      * a close tag was found that does not have a corresponding open tag.
472      *
473      * @param token an HTML tag id for which a close was found.
474      * @param htmlStack a Stack of previous open HTML tags.
475      * @return {@code false} if a previous open tag was found
476      *         for the token.
477      */
478     private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
479         boolean isExtra = true;
480         for (final HtmlTag tag : htmlStack) {
481             // Loop, looking for tags that are closed.
482             // The loop is needed in case there are unclosed
483             // tags on the stack. In that case, the stack would
484             // not be empty, but this tag would still be extra.
485             if (token.equalsIgnoreCase(tag.getId())) {
486                 isExtra = false;
487                 break;
488             }
489         }
490 
491         return isExtra;
492     }
493 
494     /**
495      * Sets the scope to check.
496      * @param scope a scope.
497      */
498     public void setScope(Scope scope) {
499         this.scope = scope;
500     }
501 
502     /**
503      * Set the excludeScope.
504      * @param excludeScope a scope.
505      */
506     public void setExcludeScope(Scope excludeScope) {
507         this.excludeScope = excludeScope;
508     }
509 
510     /**
511      * Set the format for matching the end of a sentence.
512      * @param pattern a pattern.
513      */
514     public void setEndOfSentenceFormat(Pattern pattern) {
515         endOfSentenceFormat = pattern;
516     }
517 
518     /**
519      * Sets the flag that determines if the first sentence is checked for
520      * proper end of sentence punctuation.
521      * @param flag {@code true} if the first sentence is to be checked
522      */
523     public void setCheckFirstSentence(boolean flag) {
524         checkFirstSentence = flag;
525     }
526 
527     /**
528      * Sets the flag that determines if HTML checking is to be performed.
529      * @param flag {@code true} if HTML checking is to be performed.
530      */
531     public void setCheckHtml(boolean flag) {
532         checkHtml = flag;
533     }
534 
535     /**
536      * Sets the flag that determines if empty Javadoc checking should be done.
537      * @param flag {@code true} if empty Javadoc checking should be done.
538      */
539     public void setCheckEmptyJavadoc(boolean flag) {
540         checkEmptyJavadoc = flag;
541     }
542 
543 }