1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import java.util.List;
23 import java.util.Map;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
28 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
29 import com.puppycrawl.tools.checkstyle.api.DetailAST;
30 import com.puppycrawl.tools.checkstyle.api.TextBlock;
31 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
32 import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 @FileStatefulCheck
112 public class AvoidEscapedUnicodeCharactersCheck
113 extends AbstractCheck {
114
115
116
117
118
119 public static final String MSG_KEY = "forbid.escaped.unicode.char";
120
121
122 private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u[a-fA-F0-9]{4}");
123
124
125
126
127
128
129
130 private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\[uU]"
131 + "(00[0-1][0-9A-Fa-f]"
132 + "|00[8-9][0-9A-Fa-f]"
133 + "|00[aA][dD]"
134 + "|034[fF]"
135 + "|070[fF]"
136 + "|180[eE]"
137 + "|200[b-fB-F]"
138 + "|202[a-eA-E]"
139 + "|206[0-4a-fA-F]"
140 + "|[fF]{3}[9a-bA-B]"
141 + "|[fF][eE][fF]{2})");
142
143
144 private static final Pattern ALL_ESCAPED_CHARS = Pattern.compile("^((\\\\u)[a-fA-F0-9]{4}"
145 + "|\""
146 + "|\'"
147 + "|\\\\"
148 + "|\\\\b"
149 + "|\\\\f"
150 + "|\\\\n"
151 + "|\\\\r"
152 + "|\\\\t"
153 + ")+$");
154
155
156 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
157
158
159 private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u0000"
160 + "|\\\\u0009"
161 + "|\\\\u000[bB]"
162 + "|\\\\u000[cC]"
163 + "|\\\\u0020"
164 + "|\\\\u007[fF]"
165 + "|\\\\u0085"
166 + "|\\\\u009[fF]"
167 + "|\\\\u00[aA]0"
168 + "|\\\\u00[aA][dD]"
169 + "|\\\\u04[fF]9"
170 + "|\\\\u05[bB][eE]"
171 + "|\\\\u05[dD]0"
172 + "|\\\\u05[eE][aA]"
173 + "|\\\\u05[fF]3"
174 + "|\\\\u05[fF]4"
175 + "|\\\\u0600"
176 + "|\\\\u0604"
177 + "|\\\\u061[cC]"
178 + "|\\\\u06[dD]{2}"
179 + "|\\\\u06[fF]{2}"
180 + "|\\\\u070[fF]"
181 + "|\\\\u0750"
182 + "|\\\\u077[fF]"
183 + "|\\\\u0[eE]00"
184 + "|\\\\u0[eE]7[fF]"
185 + "|\\\\u1680"
186 + "|\\\\u180[eE]"
187 + "|\\\\u1[eE]00"
188 + "|\\\\u2000"
189 + "|\\\\u2001"
190 + "|\\\\u2002"
191 + "|\\\\u2003"
192 + "|\\\\u2004"
193 + "|\\\\u2005"
194 + "|\\\\u2006"
195 + "|\\\\u2007"
196 + "|\\\\u2008"
197 + "|\\\\u2009"
198 + "|\\\\u200[aA]"
199 + "|\\\\u200[fF]"
200 + "|\\\\u2025"
201 + "|\\\\u2028"
202 + "|\\\\u2029"
203 + "|\\\\u202[fF]"
204 + "|\\\\u205[fF]"
205 + "|\\\\u2064"
206 + "|\\\\u2066"
207 + "|\\\\u2067"
208 + "|\\\\u2068"
209 + "|\\\\u2069"
210 + "|\\\\u206[aA]"
211 + "|\\\\u206[fF]"
212 + "|\\\\u20[aA][fF]"
213 + "|\\\\u2100"
214 + "|\\\\u213[aA]"
215 + "|\\\\u3000"
216 + "|\\\\u[dD]800"
217 + "|\\\\u[fF]8[fF]{2}"
218 + "|\\\\u[fF][bB]50"
219 + "|\\\\u[fF][dD][fF]{2}"
220 + "|\\\\u[fF][eE]70"
221 + "|\\\\u[fF][eE][fF]{2}"
222 + "|\\\\u[fF]{2}0[eE]"
223 + "|\\\\u[fF]{2}61"
224 + "|\\\\u[fF]{2}[dD][cC]"
225 + "|\\\\u[fF]{3}9"
226 + "|\\\\u[fF]{3}[aA]"
227 + "|\\\\u[fF]{3}[bB]"
228 + "|\\\\u[fF]{4}");
229
230
231 private Map<Integer, TextBlock> singlelineComments;
232
233 private Map<Integer, List<TextBlock>> blockComments;
234
235
236 private boolean allowEscapesForControlCharacters;
237
238
239 private boolean allowByTailComment;
240
241
242 private boolean allowIfAllCharactersEscaped;
243
244
245 private boolean allowNonPrintableEscapes;
246
247
248
249
250
251 public final void setAllowEscapesForControlCharacters(boolean allow) {
252 allowEscapesForControlCharacters = allow;
253 }
254
255
256
257
258
259 public final void setAllowByTailComment(boolean allow) {
260 allowByTailComment = allow;
261 }
262
263
264
265
266
267 public final void setAllowIfAllCharactersEscaped(boolean allow) {
268 allowIfAllCharactersEscaped = allow;
269 }
270
271
272
273
274
275 public final void setAllowNonPrintableEscapes(boolean allow) {
276 allowNonPrintableEscapes = allow;
277 }
278
279 @Override
280 public int[] getDefaultTokens() {
281 return getRequiredTokens();
282 }
283
284 @Override
285 public int[] getAcceptableTokens() {
286 return getRequiredTokens();
287 }
288
289 @Override
290 public int[] getRequiredTokens() {
291 return new int[] {TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL};
292 }
293
294 @Override
295 public void beginTree(DetailAST rootAST) {
296 singlelineComments = getFileContents().getSingleLineComments();
297 blockComments = getFileContents().getBlockComments();
298 }
299
300 @Override
301 public void visitToken(DetailAST ast) {
302 final String literal = ast.getText();
303
304 if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast)
305 || isAllCharactersEscaped(literal)
306 || allowEscapesForControlCharacters
307 && isOnlyUnicodeValidChars(literal, UNICODE_CONTROL)
308 || allowNonPrintableEscapes
309 && isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) {
310 log(ast.getLineNo(), MSG_KEY);
311 }
312 }
313
314
315
316
317
318
319 private static boolean hasUnicodeChar(String literal) {
320 final String literalWithoutEscapedBackslashes =
321 ESCAPED_BACKSLASH.matcher(literal).replaceAll("");
322 return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find();
323 }
324
325
326
327
328
329
330
331 private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) {
332 final int unicodeMatchesCounter =
333 countMatches(UNICODE_REGEXP, literal);
334 final int unicodeValidMatchesCounter =
335 countMatches(pattern, literal);
336 return unicodeMatchesCounter - unicodeValidMatchesCounter == 0;
337 }
338
339
340
341
342
343
344 private boolean hasTrailComment(DetailAST ast) {
345 boolean result = false;
346 final int lineNo = ast.getLineNo();
347 if (singlelineComments.containsKey(lineNo)) {
348 result = true;
349 }
350 else {
351 final List<TextBlock> commentList = blockComments.get(lineNo);
352 if (commentList != null) {
353 final TextBlock comment = commentList.get(commentList.size() - 1);
354 final String line = getLines()[lineNo - 1];
355 result = isTrailingBlockComment(comment, line);
356 }
357 }
358 return result;
359 }
360
361
362
363
364
365
366
367 private static boolean isTrailingBlockComment(TextBlock comment, String line) {
368 return comment.getText().length != 1
369 || CommonUtil.isBlank(line.substring(comment.getEndColNo() + 1));
370 }
371
372
373
374
375
376
377
378 private static int countMatches(Pattern pattern, String target) {
379 int matcherCounter = 0;
380 final Matcher matcher = pattern.matcher(target);
381 while (matcher.find()) {
382 matcherCounter++;
383 }
384 return matcherCounter;
385 }
386
387
388
389
390
391
392 private boolean isAllCharactersEscaped(String literal) {
393 return allowIfAllCharactersEscaped
394 && ALL_ESCAPED_CHARS.matcher(literal.substring(1,
395 literal.length() - 1)).find();
396 }
397
398 }