1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2019 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.LinkedList;
23  import java.util.List;
24  
25  /**
26   * <p>
27   * Helper class used to parse HTML tags or generic type identifiers
28   * from a single line of text. Just the beginning of the HTML tag
29   * is located.  No attempt is made to parse out the complete tag,
30   * particularly since some of the tag parameters could be located
31   * on the following line of text.  The {@code hasNextTag} and
32   * {@code nextTag} methods are used to iterate through the HTML
33   * tags or generic type identifiers that were found on the line of text.
34   * </p>
35   *
36   * <p>
37   * This class isn't really specific to HTML tags. Currently the only HTML
38   * tag that this class looks specifically for is the HTML comment tag.
39   * This class helps figure out if a tag exists and if it is well-formed.
40   * It does not know whether it is valid HTML.  This class is also used for
41   * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
42   * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
43   * </p>
44   *
45   */
46  class TagParser {
47  
48      /** List of HtmlTags found on the input line of text. */
49      private final List<HtmlTag> tags = new LinkedList<>();
50  
51      /**
52       * Constructs a TagParser and finds the first tag if any.
53       * @param text the line of text to parse.
54       * @param lineNo the source line number.
55       */
56      /* package */ TagParser(String[] text, int lineNo) {
57          parseTags(text, lineNo);
58      }
59  
60      /**
61       * Returns the next available HtmlTag.
62       * @return a HtmlTag or {@code null} if none available.
63       * @throws IndexOutOfBoundsException if there are no HtmlTags
64       *         left to return.
65       */
66      public HtmlTag nextTag() {
67          return tags.remove(0);
68      }
69  
70      /**
71       * Indicates if there are any more HtmlTag to retrieve.
72       * @return {@code true} if there are more tags.
73       */
74      public boolean hasNextTag() {
75          return !tags.isEmpty();
76      }
77  
78      /**
79       * Performs lazy initialization on the internal tags List
80       * and adds the tag.
81       * @param tag the HtmlTag to add.
82       */
83      private void add(HtmlTag tag) {
84          tags.add(tag);
85      }
86  
87      /**
88       * Parses the text line for any HTML tags and adds them to the internal
89       * List of tags.
90       * @param text the source line to parse.
91       * @param lineNo the source line number.
92       */
93      private void parseTags(String[] text, int lineNo) {
94          final int nLines = text.length;
95          Point position = findChar(text, '<', new Point(0, 0));
96          while (position.getLineNo() < nLines) {
97              // if this is html comment then skip it
98              if (isCommentTag(text, position)) {
99                  position = skipHtmlComment(text, position);
100             }
101             else if (isTag(text, position)) {
102                 position = parseTag(text, lineNo, nLines, position);
103             }
104             else {
105                 position = getNextCharPos(text, position);
106             }
107             position = findChar(text, '<', position);
108         }
109     }
110 
111     /**
112      * Parses the tag and return position after it.
113      * @param text the source line to parse.
114      * @param lineNo the source line number.
115      * @param nLines line length
116      * @param position start position for parsing
117      * @return position after tag
118      */
119     private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
120         // find end of tag
121         final Point endTag = findChar(text, '>', position);
122         final boolean incompleteTag = endTag.getLineNo() >= nLines;
123         // get tag id (one word)
124         final String tagId;
125 
126         if (incompleteTag) {
127             tagId = "";
128         }
129         else {
130             tagId = getTagId(text, position);
131         }
132         // is this closed tag
133         final boolean closedTag =
134                 endTag.getLineNo() < nLines
135                  && text[endTag.getLineNo()]
136                  .charAt(endTag.getColumnNo() - 1) == '/';
137         // add new tag
138         add(new HtmlTag(tagId,
139                         position.getLineNo() + lineNo,
140                         position.getColumnNo(),
141                         closedTag,
142                         incompleteTag,
143                         text[position.getLineNo()]));
144         return endTag;
145     }
146 
147     /**
148      * Checks if the given position is start one for HTML tag.
149      * @param javadocText text of javadoc comments.
150      * @param pos position to check.
151      * @return {@code true} some HTML tag starts from given position.
152      */
153     private static boolean isTag(String[] javadocText, Point pos) {
154         final int column = pos.getColumnNo() + 1;
155         final String text = javadocText[pos.getLineNo()];
156 
157         //Character.isJavaIdentifier... may not be a valid HTML
158         //identifier but is valid for generics
159         return column >= text.length()
160                 || Character.isJavaIdentifierStart(text.charAt(column))
161                     || text.charAt(column) == '/';
162     }
163 
164     /**
165      * Parse tag id.
166      * @param javadocText text of javadoc comments.
167      * @param tagStart start position of the tag
168      * @return id for given tag
169      */
170     private static String getTagId(String[] javadocText, Point tagStart) {
171         String tagId = "";
172         int column = tagStart.getColumnNo() + 1;
173         String text = javadocText[tagStart.getLineNo()];
174         if (column < text.length()) {
175             if (text.charAt(column) == '/') {
176                 column++;
177             }
178 
179             text = text.substring(column).trim();
180             int position = 0;
181 
182             //Character.isJavaIdentifier... may not be a valid HTML
183             //identifier but is valid for generics
184             while (position < text.length()
185                     && (Character.isJavaIdentifierStart(text.charAt(position))
186                         || Character.isJavaIdentifierPart(text.charAt(position)))) {
187                 position++;
188             }
189 
190             tagId = text.substring(0, position);
191         }
192         return tagId;
193     }
194 
195     /**
196      * If this is a HTML-comments.
197      * @param text text of javadoc comments
198      * @param pos position to check
199      * @return {@code true} if HTML-comments
200      *         starts form given position.
201      */
202     private static boolean isCommentTag(String[] text, Point pos) {
203         return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo());
204     }
205 
206     /**
207      * Skips HTML comments.
208      * @param text text of javadoc comments.
209      * @param fromPoint start position of HTML-comments
210      * @return position after HTML-comments
211      */
212     private static Point skipHtmlComment(String[] text, Point fromPoint) {
213         Point toPoint = fromPoint;
214         toPoint = findChar(text, '>', toPoint);
215         while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()]
216                 .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) {
217             toPoint = findChar(text, '>', getNextCharPos(text, toPoint));
218         }
219         return toPoint;
220     }
221 
222     /**
223      * Finds next occurrence of given character.
224      * @param text text to search
225      * @param character character to search
226      * @param from position to start search
227      * @return position of next occurrence of given character
228      */
229     private static Point findChar(String[] text, char character, Point from) {
230         Point curr = new Point(from.getLineNo(), from.getColumnNo());
231         while (curr.getLineNo() < text.length
232                && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) {
233             curr = getNextCharPos(text, curr);
234         }
235 
236         return curr;
237     }
238 
239     /**
240      * Returns position of next comment character, skips
241      * whitespaces and asterisks.
242      * @param text to search.
243      * @param from location to search from
244      * @return location of the next character.
245      */
246     private static Point getNextCharPos(String[] text, Point from) {
247         int line = from.getLineNo();
248         int column = from.getColumnNo() + 1;
249         while (line < text.length && column >= text[line].length()) {
250             // go to the next line
251             line++;
252             column = 0;
253             if (line < text.length) {
254                 //skip beginning spaces and stars
255                 final String currentLine = text[line];
256                 while (column < currentLine.length()
257                        && (Character.isWhitespace(currentLine.charAt(column))
258                            || currentLine.charAt(column) == '*')) {
259                     column++;
260                     if (column < currentLine.length()
261                         && currentLine.charAt(column - 1) == '*'
262                         && currentLine.charAt(column) == '/') {
263                         // this is end of comment
264                         column = currentLine.length();
265                     }
266                 }
267             }
268         }
269 
270         return new Point(line, column);
271     }
272 
273     /**
274      * Represents current position in the text.
275      */
276     private static final class Point {
277 
278         /** Line number. */
279         private final int lineNo;
280         /** Column number.*/
281         private final int columnNo;
282 
283         /**
284          * Creates new {@code Point} instance.
285          * @param lineNo line number
286          * @param columnNo column number
287          */
288         /* package */ Point(int lineNo, int columnNo) {
289             this.lineNo = lineNo;
290             this.columnNo = columnNo;
291         }
292 
293         /**
294          * Getter for line number.
295          * @return line number of the position.
296          */
297         public int getLineNo() {
298             return lineNo;
299         }
300 
301         /**
302          * Getter for column number.
303          * @return column number of the position.
304          */
305         public int getColumnNo() {
306             return columnNo;
307         }
308 
309     }
310 
311 }