1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.doxia.util;
20
21 import javax.imageio.ImageIO;
22 import javax.swing.text.MutableAttributeSet;
23
24 import java.awt.image.BufferedImage;
25 import java.io.File;
26 import java.io.IOException;
27 import java.net.URL;
28 import java.nio.charset.StandardCharsets;
29 import java.text.ParseException;
30 import java.text.ParsePosition;
31 import java.text.SimpleDateFormat;
32 import java.util.Date;
33 import java.util.Locale;
34
35 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
36
37 /**
38 * General Doxia utility methods. The methods in this class should not assume
39 * any specific Doxia module or document format.
40 *
41 * @author ltheussl
42 * @since 1.1
43 */
44 public class DoxiaUtils {
45 /**
46 * Checks if the given string corresponds to an internal link,
47 * ie it is a link to an anchor within the same document.
48 * If link is not null, then exactly one of the three methods
49 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
50 * {@link #isLocalLink(java.lang.String)} will return true.
51 *
52 * @param link The link to check. Not null.
53 * @return True if the link starts with "#".
54 *
55 * @throws NullPointerException if link is null.
56 * @see #isExternalLink(String)
57 * @see #isLocalLink(String)
58 */
59 public static boolean isInternalLink(final String link) {
60 return link.startsWith("#");
61 }
62
63 /**
64 * Checks if the given string corresponds to an external URI,
65 * ie is not a link within the same document nor a relative link
66 * to another document (a local link) of the same site.
67 * If link is not null, then exactly one of the three methods
68 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
69 * {@link #isLocalLink(java.lang.String)} will return true.
70 *
71 * @param link The link to check. Not null.
72 * @return True if the link (ignoring case) starts with either "http:/",
73 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
74 * Note that Windows style separators "\" are not allowed
75 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
76 *
77 * @throws NullPointerException if link is null.
78 *
79 * @see #isInternalLink(String)
80 * @see #isLocalLink(String)
81 */
82 public static boolean isExternalLink(final String link) {
83 String text = link.toLowerCase(Locale.ENGLISH);
84
85 return (text.startsWith("http:/")
86 || text.startsWith("https:/")
87 || text.startsWith("ftp:/")
88 || text.startsWith("mailto:")
89 || text.startsWith("file:/")
90 || text.contains("://"));
91 }
92
93 /**
94 * Checks if the given string corresponds to a relative link to another document
95 * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
96 * nor an {@link #isExternalLink(String) external} link.
97 * If link is not null, then exactly one of the three methods
98 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
99 * {@link #isLocalLink(java.lang.String)} will return true.
100 *
101 * @param link The link to check. Not null.
102 * @return True if the link is neither an external nor an internal link.
103 *
104 * @throws NullPointerException if link is null.
105 *
106 * @see #isExternalLink(String)
107 * @see #isInternalLink(String)
108 */
109 public static boolean isLocalLink(final String link) {
110 return (!isExternalLink(link) && !isInternalLink(link));
111 }
112
113 /**
114 * Construct a valid Doxia id.
115 *
116 * <p>
117 * A valid Doxia id corresponds to an XML id which is a {code NCName} which is in turn identical
118 * to a <a href="https://www.w3.org/TR/REC-xml/#NT-Name">{@code Name}</a>, but without a colon
119 * and without any character above {@code 0x7F}.
120 * </p>
121 * <p>
122 * To achieve this we need to convert the <i>id</i> String. Two conversions
123 * are necessary and one is done to get prettier ids:
124 * </p>
125 * <ol>
126 * <li>Trim with {@link String#trim()} before starting to process,</li>
127 * <li>if the first character is not a {@code NameStartChar} prepend the letter 'a',</li>
128 * <li>any space character ({@code 0x20}) is replaced with an underscore,</li>
129 * <li>
130 * any character not matching the above pattern is either dropped,
131 * or replaced with its UTF-8 encoding where each byte is prepended with a dot.
132 * </li>
133 * </ol>
134 *
135 * <p>
136 * Here are some examples:
137 * </p>
138 * <pre>
139 * DoxiaUtils.encodeId(null) = null
140 * DoxiaUtils.encodeId("") = null
141 * DoxiaUtils.encodeId(" ") = null
142 * DoxiaUtils.encodeId(" _ ") = "_"
143 * DoxiaUtils.encodeId("1") = "a1"
144 * DoxiaUtils.encodeId("1anchor") = "a1anchor"
145 * DoxiaUtils.encodeId("_anchor") = "_anchor"
146 * DoxiaUtils.encodeId("a b-c123 ") = "a_b-c123"
147 * DoxiaUtils.encodeId(" anchor") = "anchor"
148 * DoxiaUtils.encodeId("myAnchor") = "myAnchor"
149 * DoxiaUtils.encodeId("€") = "a.E2.82.AC"
150 * </pre>
151 *
152 * @param text The text to be encoded.
153 * May be null, empty or blank in which case null is returned.
154 * @return The trimmed and encoded id, or null if id is null.
155 * If id is not null, the return value is guaranteed to be a valid Doxia id.
156 * @see #isValidId(java.lang.String)
157 * @since 1.1.1
158 */
159 public static String encodeId(final String text) {
160 if (text == null) {
161 return null;
162 }
163
164 final String textt = text.trim();
165 int length = textt.length();
166
167 if (length == 0) {
168 return null;
169 }
170
171 StringBuilder buffer = new StringBuilder(length);
172
173 for (int i = 0; i < length; ++i) {
174 char c = textt.charAt(i);
175
176 if ((i == 0) && !(isAsciiLetter(c) || c == '_')) {
177 buffer.append('a');
178 }
179
180 if (c == ' ') {
181 buffer.append('_');
182 } else if (isAsciiLetter(c) || isAsciiDigit(c) || (c == '-') || (c == '_') || (c == '.')) {
183 buffer.append(c);
184 } else {
185
186 byte[] bytes = String.valueOf(c).getBytes(StandardCharsets.UTF_8);
187
188 for (byte aByte : bytes) {
189 buffer.append('.');
190 buffer.append(String.format("%02X", aByte));
191 }
192 }
193 }
194
195 return buffer.toString();
196 }
197
198 /**
199 * Determines if the specified text is a valid id according to the rules
200 * laid out in {@link #encodeId(String)}.
201 *
202 * @param text The id to be tested.
203 * May be null or empty in which case false is returned.
204 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
205 * @see #encodeId(String)
206 */
207 public static boolean isValidId(final String text) {
208 if (text == null || text.length() == 0) {
209 return false;
210 }
211
212 for (int i = 0; i < text.length(); ++i) {
213 char c = text.charAt(i);
214
215 if (isAsciiLetter(c) || c == '_') {
216 continue;
217 }
218
219 if ((i == 0) || (!isAsciiDigit(c) && c != '-' && c != '.')) {
220 return false;
221 }
222 }
223
224 return true;
225 }
226
227 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("", Locale.ENGLISH);
228 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition(0);
229 private static final String[] DATE_PATTERNS = new String[] {
230 "yyyy-MM-dd",
231 "yyyy/MM/dd",
232 "yyyyMMdd",
233 "yyyy",
234 "dd.MM.yyyy",
235 "dd MMM yyyy",
236 "dd MMM. yyyy",
237 "MMMM yyyy",
238 "MMM. dd, yyyy",
239 "MMM. yyyy",
240 "MMMM dd, yyyy",
241 "MMM d, ''yy",
242 "MMM. ''yy",
243 "MMMM ''yy"
244 };
245
246 /**
247 * <p>Parses a string representing a date by trying different date patterns.</p>
248 *
249 * <p>The following date patterns are tried (in the given order):</p>
250 *
251 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
252 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
253 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
254 *
255 * <p>A parse is only sucessful if it parses the whole of the input string.
256 * If no parse patterns match, a ParseException is thrown.</p>
257 *
258 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
259 * (ignoring case) return the current date.</p>
260 *
261 * @param str the date to parse, not null.
262 * @return the parsed date, or the current date if the input String (ignoring case) was
263 * <code>"today"</code> or <code>"now"</code>.
264 *
265 * @throws ParseException if no pattern matches.
266 * @throws NullPointerException if str is null.
267 * @since 1.1.1.
268 */
269 public static Date parseDate(final String str) throws ParseException {
270 if ("today".equalsIgnoreCase(str) || "now".equalsIgnoreCase(str)) {
271 return new Date();
272 }
273
274 for (String datePattern : DATE_PATTERNS) {
275 DATE_PARSER.applyPattern(datePattern);
276 DATE_PARSE_POSITION.setIndex(0);
277 final Date date = DATE_PARSER.parse(str, DATE_PARSE_POSITION);
278
279 if (date != null && DATE_PARSE_POSITION.getIndex() == str.length()) {
280 return date;
281 }
282 }
283
284 throw new ParseException("Unable to parse date: " + str, -1);
285 }
286
287 //
288 // private
289 //
290
291 private static boolean isAsciiLetter(final char c) {
292 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
293 }
294
295 private static boolean isAsciiDigit(final char c) {
296 return (c >= '0' && c <= '9');
297 }
298
299 /**
300 * Determine width and height of an image. If successful, the returned SinkEventAttributes
301 * contain width and height attribute keys whose values are the width and height of the image (as a String).
302 *
303 * @param logo a String containing either a URL or a path to an image file. Not null.
304 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
305 *
306 * @throws java.io.IOException if an error occurs during reading.
307 * @throws NullPointerException if logo is null.
308 *
309 * @since 1.1.1
310 */
311 public static MutableAttributeSet getImageAttributes(final String logo) throws IOException {
312 BufferedImage img;
313
314 if (isExternalLink(logo)) {
315 img = ImageIO.read(new URL(logo));
316 } else {
317 img = ImageIO.read(new File(logo));
318 }
319
320 if (img == null) {
321 return null;
322 }
323
324 MutableAttributeSet atts = new SinkEventAttributeSet();
325 atts.addAttribute(SinkEventAttributeSet.WIDTH, Integer.toString(img.getWidth()));
326 atts.addAttribute(SinkEventAttributeSet.HEIGHT, Integer.toString(img.getHeight()));
327 // add other attributes?
328
329 return atts;
330 }
331
332 private DoxiaUtils() {
333 // utility class
334 }
335 }