View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.doxia.util;
20  
21  import javax.imageio.ImageIO;
22  import javax.swing.text.MutableAttributeSet;
23  
24  import java.awt.image.BufferedImage;
25  import java.io.File;
26  import java.io.IOException;
27  import java.net.URL;
28  import java.nio.charset.StandardCharsets;
29  import java.text.ParseException;
30  import java.text.ParsePosition;
31  import java.text.SimpleDateFormat;
32  import java.util.Date;
33  import java.util.Locale;
34  
35  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
36  
37  /**
38   * General Doxia utility methods. The methods in this class should not assume
39   * any specific Doxia module or document format.
40   *
41   * @author ltheussl
42   * @since 1.1
43   */
44  public class DoxiaUtils {
45      /**
46       * Checks if the given string corresponds to an internal link,
47       * ie it is a link to an anchor within the same document.
48       * If link is not null, then exactly one of the three methods
49       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
50       * {@link #isLocalLink(java.lang.String)} will return true.
51       *
52       * @param link The link to check. Not null.
53       * @return True if the link starts with "#".
54       *
55       * @throws NullPointerException if link is null.
56       * @see #isExternalLink(String)
57       * @see #isLocalLink(String)
58       */
59      public static boolean isInternalLink(final String link) {
60          return link.startsWith("#");
61      }
62  
63      /**
64       * Checks if the given string corresponds to an external URI,
65       * ie is not a link within the same document nor a relative link
66       * to another document (a local link) of the same site.
67       * If link is not null, then exactly one of the three methods
68       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
69       * {@link #isLocalLink(java.lang.String)} will return true.
70       *
71       * @param link The link to check. Not null.
72       * @return True if the link (ignoring case) starts with either "http:/",
73       * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
74       * Note that Windows style separators "\" are not allowed
75       * for URIs, see  http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
76       *
77       * @throws NullPointerException if link is null.
78       *
79       * @see #isInternalLink(String)
80       * @see #isLocalLink(String)
81       */
82      public static boolean isExternalLink(final String link) {
83          String text = link.toLowerCase(Locale.ENGLISH);
84  
85          return (text.startsWith("http:/")
86                  || text.startsWith("https:/")
87                  || text.startsWith("ftp:/")
88                  || text.startsWith("mailto:")
89                  || text.startsWith("file:/")
90                  || text.contains("://"));
91      }
92  
93      /**
94       * Checks if the given string corresponds to a relative link to another document
95       * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
96       * nor an {@link #isExternalLink(String) external} link.
97       * If link is not null, then exactly one of the three methods
98       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
99       * {@link #isLocalLink(java.lang.String)} will return true.
100      *
101      * @param link The link to check. Not null.
102      * @return True if the link is neither an external nor an internal link.
103      *
104      * @throws NullPointerException if link is null.
105      *
106      * @see #isExternalLink(String)
107      * @see #isInternalLink(String)
108      */
109     public static boolean isLocalLink(final String link) {
110         return (!isExternalLink(link) && !isInternalLink(link));
111     }
112 
113     /**
114      * Construct a valid Doxia id.
115      *
116      * <p>
117      *   A valid Doxia id corresponds to an XML id which is a {code NCName} which is in turn identical
118      *   to a <a href="https://www.w3.org/TR/REC-xml/#NT-Name">{@code Name}</a>, but without a colon
119      *   and without any character above {@code 0x7F}.
120      * </p>
121      * <p>
122      *   To achieve this we need to convert the <i>id</i> String. Two conversions
123      *   are necessary and one is done to get prettier ids:
124      * </p>
125      * <ol>
126      *   <li>Trim with {@link String#trim()} before starting to process,</li>
127      *   <li>if the first character is not a {@code NameStartChar} prepend the letter 'a',</li>
128      *   <li>any space character ({@code 0x20}) is replaced with an underscore,</li>
129      *   <li>
130      *     any character not matching the above pattern is either dropped,
131      *     or replaced with its UTF-8 encoding where each byte is prepended with a dot.
132      *   </li>
133      * </ol>
134      *
135      * <p>
136      * Here are some examples:
137      * </p>
138      * <pre>
139      * DoxiaUtils.encodeId(null)        = null
140      * DoxiaUtils.encodeId("")          = null
141      * DoxiaUtils.encodeId("  ")        = null
142      * DoxiaUtils.encodeId(" _ ")       = "_"
143      * DoxiaUtils.encodeId("1")         = "a1"
144      * DoxiaUtils.encodeId("1anchor")   = "a1anchor"
145      * DoxiaUtils.encodeId("_anchor")   = "_anchor"
146      * DoxiaUtils.encodeId("a b-c123 ") = "a_b-c123"
147      * DoxiaUtils.encodeId("   anchor") = "anchor"
148      * DoxiaUtils.encodeId("myAnchor")  = "myAnchor"
149      * DoxiaUtils.encodeId("€")         = "a.E2.82.AC"
150      * </pre>
151      *
152      * @param text The text to be encoded.
153      *      May be null, empty or blank in which case null is returned.
154      * @return The trimmed and encoded id, or null if id is null.
155      * If id is not null, the return value is guaranteed to be a valid Doxia id.
156      * @see #isValidId(java.lang.String)
157      * @since 1.1.1
158      */
159     public static String encodeId(final String text) {
160         if (text == null) {
161             return null;
162         }
163 
164         final String textt = text.trim();
165         int length = textt.length();
166 
167         if (length == 0) {
168             return null;
169         }
170 
171         StringBuilder buffer = new StringBuilder(length);
172 
173         for (int i = 0; i < length; ++i) {
174             char c = textt.charAt(i);
175 
176             if ((i == 0) && !(isAsciiLetter(c) || c == '_')) {
177                 buffer.append('a');
178             }
179 
180             if (c == ' ') {
181                 buffer.append('_');
182             } else if (isAsciiLetter(c) || isAsciiDigit(c) || (c == '-') || (c == '_') || (c == '.')) {
183                 buffer.append(c);
184             } else {
185 
186                 byte[] bytes = String.valueOf(c).getBytes(StandardCharsets.UTF_8);
187 
188                 for (byte aByte : bytes) {
189                     buffer.append('.');
190                     buffer.append(String.format("%02X", aByte));
191                 }
192             }
193         }
194 
195         return buffer.toString();
196     }
197 
198     /**
199      * Determines if the specified text is a valid id according to the rules
200      * laid out in {@link #encodeId(String)}.
201      *
202      * @param text The id to be tested.
203      *      May be null or empty in which case false is returned.
204      * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
205      * @see #encodeId(String)
206      */
207     public static boolean isValidId(final String text) {
208         if (text == null || text.length() == 0) {
209             return false;
210         }
211 
212         for (int i = 0; i < text.length(); ++i) {
213             char c = text.charAt(i);
214 
215             if (isAsciiLetter(c) || c == '_') {
216                 continue;
217             }
218 
219             if ((i == 0) || (!isAsciiDigit(c) && c != '-' && c != '.')) {
220                 return false;
221             }
222         }
223 
224         return true;
225     }
226 
227     private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("", Locale.ENGLISH);
228     private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition(0);
229     private static final String[] DATE_PATTERNS = new String[] {
230         "yyyy-MM-dd",
231         "yyyy/MM/dd",
232         "yyyyMMdd",
233         "yyyy",
234         "dd.MM.yyyy",
235         "dd MMM yyyy",
236         "dd MMM. yyyy",
237         "MMMM yyyy",
238         "MMM. dd, yyyy",
239         "MMM. yyyy",
240         "MMMM dd, yyyy",
241         "MMM d, ''yy",
242         "MMM. ''yy",
243         "MMMM ''yy"
244     };
245 
246     /**
247      * <p>Parses a string representing a date by trying different date patterns.</p>
248      *
249      * <p>The following date patterns are tried (in the given order):</p>
250      *
251      * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
252      *  "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
253      *  "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
254      *
255      * <p>A parse is only sucessful if it parses the whole of the input string.
256      * If no parse patterns match, a ParseException is thrown.</p>
257      *
258      * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
259      * (ignoring case) return the current date.</p>
260      *
261      * @param str the date to parse, not null.
262      * @return the parsed date, or the current date if the input String (ignoring case) was
263      *      <code>"today"</code> or <code>"now"</code>.
264      *
265      * @throws ParseException if no pattern matches.
266      * @throws NullPointerException if str is null.
267      * @since 1.1.1.
268      */
269     public static Date parseDate(final String str) throws ParseException {
270         if ("today".equalsIgnoreCase(str) || "now".equalsIgnoreCase(str)) {
271             return new Date();
272         }
273 
274         for (String datePattern : DATE_PATTERNS) {
275             DATE_PARSER.applyPattern(datePattern);
276             DATE_PARSE_POSITION.setIndex(0);
277             final Date date = DATE_PARSER.parse(str, DATE_PARSE_POSITION);
278 
279             if (date != null && DATE_PARSE_POSITION.getIndex() == str.length()) {
280                 return date;
281             }
282         }
283 
284         throw new ParseException("Unable to parse date: " + str, -1);
285     }
286 
287     //
288     // private
289     //
290 
291     private static boolean isAsciiLetter(final char c) {
292         return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
293     }
294 
295     private static boolean isAsciiDigit(final char c) {
296         return (c >= '0' && c <= '9');
297     }
298 
299     /**
300      * Determine width and height of an image. If successful, the returned SinkEventAttributes
301      * contain width and height attribute keys whose values are the width and height of the image (as a String).
302      *
303      * @param logo a String containing either a URL or a path to an image file. Not null.
304      * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
305      *
306      * @throws java.io.IOException if an error occurs during reading.
307      * @throws NullPointerException if logo is null.
308      *
309      * @since 1.1.1
310      */
311     public static MutableAttributeSet getImageAttributes(final String logo) throws IOException {
312         BufferedImage img;
313 
314         if (isExternalLink(logo)) {
315             img = ImageIO.read(new URL(logo));
316         } else {
317             img = ImageIO.read(new File(logo));
318         }
319 
320         if (img == null) {
321             return null;
322         }
323 
324         MutableAttributeSet atts = new SinkEventAttributeSet();
325         atts.addAttribute(SinkEventAttributeSet.WIDTH, Integer.toString(img.getWidth()));
326         atts.addAttribute(SinkEventAttributeSet.HEIGHT, Integer.toString(img.getHeight()));
327         // add other attributes?
328 
329         return atts;
330     }
331 
332     private DoxiaUtils() {
333         // utility class
334     }
335 }