1 package org.apache.maven.doxia.util;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.awt.image.BufferedImage;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.UnsupportedEncodingException;
27
28 import java.net.URL;
29
30 import java.text.ParseException;
31 import java.text.ParsePosition;
32 import java.text.SimpleDateFormat;
33
34 import java.util.Date;
35 import java.util.Locale;
36
37 import javax.imageio.ImageIO;
38
39 import javax.swing.text.MutableAttributeSet;
40
41 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
42
43 /**
44 * General Doxia utility methods. The methods in this class should not assume
45 * any specific Doxia module or document format.
46 *
47 * @author ltheussl
48 * @since 1.1
49 * @version $Id: DoxiaUtils.java 1185112 2011-10-17 11:33:00Z ltheussl $
50 */
51 public class DoxiaUtils
52 {
53 private static final int MINUS_ONE = 0xFF;
54
55 /**
56 * Checks if the given string corresponds to an internal link,
57 * ie it is a link to an anchor within the same document.
58 * If link is not null, then exactly one of the three methods
59 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
60 * {@link #isLocalLink(java.lang.String)} will return true.
61 *
62 * @param link The link to check. Not null.
63 * @return True if the link starts with "#".
64 *
65 * @throws NullPointerException if link is null.
66 *
67 * @see #isExternalLink(String)
68 * @see #isLocalLink(String)
69 */
70 public static boolean isInternalLink( final String link )
71 {
72 return link.startsWith( "#" );
73 }
74
75 /**
76 * Checks if the given string corresponds to an external URI,
77 * ie is not a link within the same document nor a relative link
78 * to another document (a local link) of the same site.
79 * If link is not null, then exactly one of the three methods
80 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
81 * {@link #isLocalLink(java.lang.String)} will return true.
82 *
83 * @param link The link to check. Not null.
84 *
85 * @return True if the link (ignoring case) starts with either "http:/",
86 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
87 * Note that Windows style separators "\" are not allowed
88 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
89 *
90 * @throws NullPointerException if link is null.
91 *
92 * @see #isInternalLink(String)
93 * @see #isLocalLink(String)
94 */
95 public static boolean isExternalLink( final String link )
96 {
97 String text = link.toLowerCase( Locale.ENGLISH );
98
99 return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" )
100 || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" )
101 || text.startsWith( "file:/" ) || text.contains( "://" ) );
102 }
103
104 /**
105 * Checks if the given string corresponds to a relative link to another document
106 * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
107 * nor an {@link #isExternalLink(String) external} link.
108 * If link is not null, then exactly one of the three methods
109 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
110 * {@link #isLocalLink(java.lang.String)} will return true.
111 *
112 * @param link The link to check. Not null.
113 *
114 * @return True if the link is neither an external nor an internal link.
115 *
116 * @throws NullPointerException if link is null.
117 *
118 * @see #isExternalLink(String)
119 * @see #isInternalLink(String)
120 */
121 public static boolean isLocalLink( final String link )
122 {
123 return ( !isExternalLink( link ) && !isInternalLink( link ) );
124 }
125
126 /**
127 * Construct a valid Doxia id.
128 *
129 * <p>
130 * This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}.
131 * </p>
132 *
133 * @param id The id to be encoded.
134 * May be null in which case null is returned.
135 *
136 * @return The trimmed and encoded id, or null if id is null.
137 *
138 * @see #encodeId(java.lang.String, boolean)
139 */
140 public static String encodeId( final String id )
141 {
142 return encodeId( id, false );
143 }
144
145 /**
146 * Construct a valid Doxia id.
147 *
148 * <p>
149 * A valid Doxia id obeys the same constraints as an HTML ID or NAME token.
150 * According to the <a href="http://www.w3.org/TR/html4/types.html#type-name">
151 * HTML 4.01 specification section 6.2 SGML basic types</a>:
152 * </p>
153 * <p>
154 * <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
155 * followed by any number of letters, digits ([0-9]), hyphens ("-"),
156 * underscores ("_"), colons (":"), and periods (".").</i>
157 * </p>
158 * <p>
159 * According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0
160 * section C.8. Fragment Identifiers</a>:
161 * </p>
162 * <p>
163 * <i>When defining fragment identifiers to be backward-compatible, only
164 * strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i>
165 * </p>
166 * <p>
167 * To achieve this we need to convert the <i>id</i> String. Two conversions
168 * are necessary and one is done to get prettier ids:
169 * </p>
170 * <ol>
171 * <li>Remove whitespace at the start and end before starting to process</li>
172 * <li>If the first character is not a letter, prepend the id with the letter 'a'</li>
173 * <li>Any spaces are replaced with an underscore '_'</li>
174 * <li>
175 * Any characters not matching the above pattern are either dropped,
176 * or replaced according to the rules specified in the
177 * <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>.
178 * </li>
179 * </ol>
180 * <p>
181 * For letters, the case is preserved in the conversion.
182 * </p>
183 *
184 * <p>
185 * Here are some examples:
186 * </p>
187 * <pre>
188 * DoxiaUtils.encodeId( null ) = null
189 * DoxiaUtils.encodeId( "" ) = "a"
190 * DoxiaUtils.encodeId( " " ) = "a"
191 * DoxiaUtils.encodeId( " _ " ) = "a_"
192 * DoxiaUtils.encodeId( "1" ) = "a1"
193 * DoxiaUtils.encodeId( "1anchor" ) = "a1anchor"
194 * DoxiaUtils.encodeId( "_anchor" ) = "a_anchor"
195 * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123"
196 * DoxiaUtils.encodeId( " anchor" ) = "anchor"
197 * DoxiaUtils.encodeId( "myAnchor" ) = "myAnchor"
198 * </pre>
199 *
200 * @param id The id to be encoded.
201 * May be null in which case null is returned.
202 * @param chop true if non-ASCII characters should be ignored.
203 * If false, any non-ASCII characters will be replaced as specified above.
204 *
205 * @return The trimmed and encoded id, or null if id is null.
206 * If id is not null, the return value is guaranteed to be a valid Doxia id.
207 *
208 * @see #isValidId(java.lang.String)
209 *
210 * @since 1.1.1
211 */
212 public static String encodeId( final String id, final boolean chop )
213 {
214 if ( id == null )
215 {
216 return null;
217 }
218
219 final String idd = id.trim();
220 int length = idd.length();
221
222 if ( length == 0 )
223 {
224 return "a";
225 }
226
227 StringBuilder buffer = new StringBuilder( length );
228
229 for ( int i = 0; i < length; ++i )
230 {
231 char c = idd.charAt( i );
232
233 if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) )
234 {
235 buffer.append( 'a' );
236 }
237
238 if ( c == ' ' )
239 {
240 buffer.append( '_' );
241 }
242 else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' )
243 || ( c == '.' ) )
244 {
245 buffer.append( c );
246 }
247 else if ( !chop )
248 {
249 byte[] bytes;
250
251 try
252 {
253 bytes = String.valueOf( c ).getBytes( "UTF8" );
254 }
255 catch ( UnsupportedEncodingException cannotHappen )
256 {
257 bytes = new byte[0];
258 }
259
260 for ( int j = 0; j < bytes.length; ++j )
261 {
262 String hex = byteToHex( bytes[j] );
263
264 buffer.append( '%' );
265
266 if ( hex.length() == 1 )
267 {
268 buffer.append( '0' );
269 }
270
271 buffer.append( hex );
272 }
273 }
274 }
275
276 return buffer.toString();
277 }
278
279 /**
280 * Convert a byte to it's hexadecimal equivalent.
281 *
282 * @param b the byte value.
283 * @return the result of Integer.toHexString( b & 0xFF ).
284 *
285 * @since 1.1.1
286 */
287 public static String byteToHex( final byte b )
288 {
289 return Integer.toHexString( b & MINUS_ONE );
290 }
291
292 /**
293 * Determines if the specified text is a valid id according to the rules
294 * laid out in {@link #encodeId(String)}.
295 *
296 * @param text The text to be tested.
297 * May be null in which case false is returned.
298 *
299 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
300 *
301 * @see #encodeId(String)
302 */
303 public static boolean isValidId( final String text )
304 {
305 if ( text == null || text.length() == 0 )
306 {
307 return false;
308 }
309
310 for ( int i = 0; i < text.length(); ++i )
311 {
312 char c = text.charAt( i );
313
314 if ( isAsciiLetter( c ) )
315 {
316 continue;
317 }
318
319 if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) )
320 {
321 return false;
322 }
323 }
324
325 return true;
326 }
327
328 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH );
329 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 );
330 private static final String[] DATE_PATTERNS = new String[]
331 {
332 "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
333 "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
334 "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"
335 };
336
337 /**
338 * <p>Parses a string representing a date by trying different date patterns.</p>
339 *
340 * <p>The following date patterns are tried (in the given order):</p>
341 *
342 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
343 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
344 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
345 *
346 * <p>A parse is only sucessful if it parses the whole of the input string.
347 * If no parse patterns match, a ParseException is thrown.</p>
348 *
349 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
350 * (ignoring case) return the current date.</p>
351 *
352 * @param str the date to parse, not null.
353 *
354 * @return the parsed date, or the current date if the input String (ignoring case) was
355 * <code>"today"</code> or <code>"now"</code>.
356 *
357 * @throws ParseException if no pattern matches.
358 * @throws NullPointerException if str is null.
359 *
360 * @since 1.1.1.
361 */
362 public static Date parseDate( final String str )
363 throws ParseException
364 {
365 if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) )
366 {
367 return new Date();
368 }
369
370 for ( int i = 0; i < DATE_PATTERNS.length; i++ )
371 {
372 DATE_PARSER.applyPattern( DATE_PATTERNS[i] );
373 DATE_PARSE_POSITION.setIndex( 0 );
374 final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION );
375
376 if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() )
377 {
378 return date;
379 }
380 }
381
382 throw new ParseException( "Unable to parse date: " + str, -1 );
383 }
384
385 //
386 // private
387 //
388
389 private static boolean isAsciiLetter( final char c )
390 {
391 return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) );
392 }
393
394 private static boolean isAsciiDigit( final char c )
395 {
396 return ( c >= '0' && c <= '9' );
397 }
398
399 /**
400 * Determine width and height of an image. If successful, the returned SinkEventAttributes
401 * contain width and height attribute keys whose values are the width and height of the image (as a String).
402 *
403 * @param logo a String containing either a URL or a path to an image file. Not null.
404 *
405 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
406 *
407 * @throws java.io.IOException if an error occurs during reading.
408 * @throws NullPointerException if logo is null.
409 *
410 * @since 1.1.1
411 */
412 public static MutableAttributeSet getImageAttributes( final String logo )
413 throws IOException
414 {
415 BufferedImage img = null;
416
417 if ( isExternalLink( logo ) )
418 {
419 img = ImageIO.read( new URL( logo ) );
420 }
421 else
422 {
423 img = ImageIO.read( new File( logo ) );
424 }
425
426 if ( img == null )
427 {
428 return null;
429 }
430
431 MutableAttributeSet atts = new SinkEventAttributeSet();
432 atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) );
433 atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) );
434 // add other attributes?
435
436 return atts;
437 }
438
439 private DoxiaUtils()
440 {
441 // utility class
442 }
443 }