View Javadoc

1   package org.apache.maven.doxia.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.awt.image.BufferedImage;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.UnsupportedEncodingException;
27  
28  import java.net.URL;
29  
30  import java.text.ParseException;
31  import java.text.ParsePosition;
32  import java.text.SimpleDateFormat;
33  
34  import java.util.Date;
35  import java.util.Locale;
36  
37  import javax.imageio.ImageIO;
38  
39  import javax.swing.text.MutableAttributeSet;
40  
41  import org.apache.maven.doxia.sink.SinkEventAttributeSet;
42  
43  /**
44   * General Doxia utility methods. The methods in this class should not assume
45   * any specific Doxia module or document format.
46   *
47   * @author ltheussl
48   * @since 1.1
49   * @version $Id: DoxiaUtils.java 1185112 2011-10-17 11:33:00Z ltheussl $
50   */
51  public class DoxiaUtils
52  {
53      private static final int MINUS_ONE = 0xFF;
54  
55      /**
56       * Checks if the given string corresponds to an internal link,
57       * ie it is a link to an anchor within the same document.
58       * If link is not null, then exactly one of the three methods
59       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
60       * {@link #isLocalLink(java.lang.String)} will return true.
61       *
62       * @param link The link to check. Not null.
63       * @return True if the link starts with "#".
64       *
65       * @throws NullPointerException if link is null.
66       *
67       * @see #isExternalLink(String)
68       * @see #isLocalLink(String)
69       */
70      public static boolean isInternalLink( final String link )
71      {
72          return link.startsWith( "#" );
73      }
74  
75      /**
76       * Checks if the given string corresponds to an external URI,
77       * ie is not a link within the same document nor a relative link
78       * to another document (a local link) of the same site.
79       * If link is not null, then exactly one of the three methods
80       * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
81       * {@link #isLocalLink(java.lang.String)} will return true.
82       *
83       * @param link The link to check. Not null.
84       *
85       * @return True if the link (ignoring case) starts with either "http:/",
86       * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
87       * Note that Windows style separators "\" are not allowed
88       * for URIs, see  http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
89       *
90       * @throws NullPointerException if link is null.
91       *
92       * @see #isInternalLink(String)
93       * @see #isLocalLink(String)
94       */
95      public static boolean isExternalLink( final String link )
96      {
97          String text = link.toLowerCase( Locale.ENGLISH );
98  
99          return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" )
100             || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" )
101             || text.startsWith( "file:/" ) || text.contains( "://" ) );
102     }
103 
104     /**
105      * Checks if the given string corresponds to a relative link to another document
106      * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
107      * nor an {@link #isExternalLink(String) external} link.
108      * If link is not null, then exactly one of the three methods
109      * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
110      * {@link #isLocalLink(java.lang.String)} will return true.
111      *
112      * @param link The link to check. Not null.
113      *
114      * @return True if the link is neither an external nor an internal link.
115      *
116      * @throws NullPointerException if link is null.
117      *
118      * @see #isExternalLink(String)
119      * @see #isInternalLink(String)
120      */
121     public static boolean isLocalLink( final String link )
122     {
123         return ( !isExternalLink( link ) && !isInternalLink( link ) );
124     }
125 
126     /**
127      * Construct a valid Doxia id.
128      *
129      * <p>
130      *   This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}.
131      * </p>
132      *
133      * @param id The id to be encoded.
134      *      May be null in which case null is returned.
135      *
136      * @return The trimmed and encoded id, or null if id is null.
137      *
138      * @see #encodeId(java.lang.String, boolean)
139      */
140     public static String encodeId( final String id )
141     {
142         return encodeId( id, false );
143     }
144 
145     /**
146      * Construct a valid Doxia id.
147      *
148      * <p>
149      *   A valid Doxia id obeys the same constraints as an HTML ID or NAME token.
150      *   According to the <a href="http://www.w3.org/TR/html4/types.html#type-name">
151      *   HTML 4.01 specification section 6.2 SGML basic types</a>:
152      * </p>
153      * <p>
154      *   <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
155      *   followed by any number of letters, digits ([0-9]), hyphens ("-"),
156      *   underscores ("_"), colons (":"), and periods (".").</i>
157      * </p>
158      * <p>
159      *   According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0
160      *   section C.8. Fragment Identifiers</a>:
161      * </p>
162      * <p>
163      *   <i>When defining fragment identifiers to be backward-compatible, only
164      *   strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i>
165      * </p>
166      * <p>
167      *   To achieve this we need to convert the <i>id</i> String. Two conversions
168      *   are necessary and one is done to get prettier ids:
169      * </p>
170      * <ol>
171      *   <li>Remove whitespace at the start and end before starting to process</li>
172      *   <li>If the first character is not a letter, prepend the id with the letter 'a'</li>
173      *   <li>Any spaces are replaced with an underscore '_'</li>
174      *   <li>
175      *     Any characters not matching the above pattern are either dropped,
176      *     or replaced according to the rules specified in the
177      *     <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>.
178      *   </li>
179      * </ol>
180      * <p>
181      *   For letters, the case is preserved in the conversion.
182      * </p>
183      *
184      * <p>
185      * Here are some examples:
186      * </p>
187      * <pre>
188      * DoxiaUtils.encodeId( null )        = null
189      * DoxiaUtils.encodeId( "" )          = "a"
190      * DoxiaUtils.encodeId( "  " )        = "a"
191      * DoxiaUtils.encodeId( " _ " )       = "a_"
192      * DoxiaUtils.encodeId( "1" )         = "a1"
193      * DoxiaUtils.encodeId( "1anchor" )   = "a1anchor"
194      * DoxiaUtils.encodeId( "_anchor" )   = "a_anchor"
195      * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123"
196      * DoxiaUtils.encodeId( "   anchor" ) = "anchor"
197      * DoxiaUtils.encodeId( "myAnchor" )  = "myAnchor"
198      * </pre>
199      *
200      * @param id The id to be encoded.
201      *      May be null in which case null is returned.
202      * @param chop true if non-ASCII characters should be ignored.
203      * If false, any non-ASCII characters will be replaced as specified above.
204      *
205      * @return The trimmed and encoded id, or null if id is null.
206      * If id is not null, the return value is guaranteed to be a valid Doxia id.
207      *
208      * @see #isValidId(java.lang.String)
209      *
210      * @since 1.1.1
211      */
212     public static String encodeId( final String id, final boolean chop )
213     {
214         if ( id == null )
215         {
216             return null;
217         }
218 
219         final String idd = id.trim();
220         int length = idd.length();
221 
222         if ( length == 0 )
223         {
224             return "a";
225         }
226 
227         StringBuilder buffer = new StringBuilder( length );
228 
229         for ( int i = 0; i < length; ++i )
230         {
231             char c = idd.charAt( i );
232 
233             if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) )
234             {
235                 buffer.append( 'a' );
236             }
237 
238             if ( c == ' ' )
239             {
240                 buffer.append( '_' );
241             }
242             else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' )
243                             || ( c == '.' ) )
244             {
245                 buffer.append( c );
246             }
247             else if ( !chop )
248             {
249                 byte[] bytes;
250 
251                 try
252                 {
253                     bytes = String.valueOf( c ).getBytes( "UTF8" );
254                 }
255                 catch ( UnsupportedEncodingException cannotHappen )
256                 {
257                     bytes = new byte[0];
258                 }
259 
260                 for ( int j = 0; j < bytes.length; ++j )
261                 {
262                     String hex = byteToHex( bytes[j] );
263 
264                     buffer.append( '%' );
265 
266                     if ( hex.length() == 1 )
267                     {
268                         buffer.append( '0' );
269                     }
270 
271                     buffer.append( hex );
272                 }
273             }
274         }
275 
276         return buffer.toString();
277     }
278 
279     /**
280      * Convert a byte to it's hexadecimal equivalent.
281      *
282      * @param b the byte value.
283      * @return the result of Integer.toHexString( b & 0xFF ).
284      *
285      * @since 1.1.1
286      */
287     public static String byteToHex( final byte b )
288     {
289         return Integer.toHexString( b & MINUS_ONE );
290     }
291 
292     /**
293      * Determines if the specified text is a valid id according to the rules
294      * laid out in {@link #encodeId(String)}.
295      *
296      * @param text The text to be tested.
297      *      May be null in which case false is returned.
298      *
299      * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
300      *
301      * @see #encodeId(String)
302      */
303     public static boolean isValidId( final String text )
304     {
305         if ( text == null || text.length() == 0 )
306         {
307             return false;
308         }
309 
310         for ( int i = 0; i < text.length(); ++i )
311         {
312             char c = text.charAt( i );
313 
314             if ( isAsciiLetter( c ) )
315             {
316                 continue;
317             }
318 
319             if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) )
320             {
321                 return false;
322             }
323         }
324 
325         return true;
326     }
327 
328     private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH );
329     private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 );
330     private static final String[] DATE_PATTERNS = new String[]
331     {
332         "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
333         "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
334         "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"
335     };
336 
337     /**
338      * <p>Parses a string representing a date by trying different date patterns.</p>
339      *
340      * <p>The following date patterns are tried (in the given order):</p>
341      *
342      * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
343      *  "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
344      *  "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
345      *
346      * <p>A parse is only sucessful if it parses the whole of the input string.
347      * If no parse patterns match, a ParseException is thrown.</p>
348      *
349      * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
350      * (ignoring case) return the current date.</p>
351      *
352      * @param str the date to parse, not null.
353      *
354      * @return the parsed date, or the current date if the input String (ignoring case) was
355      *      <code>"today"</code> or <code>"now"</code>.
356      *
357      * @throws ParseException if no pattern matches.
358      * @throws NullPointerException if str is null.
359      *
360      * @since 1.1.1.
361      */
362     public static Date parseDate( final String str )
363             throws ParseException
364     {
365         if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) )
366         {
367             return new Date();
368         }
369 
370         for ( int i = 0; i < DATE_PATTERNS.length; i++ )
371         {
372             DATE_PARSER.applyPattern( DATE_PATTERNS[i] );
373             DATE_PARSE_POSITION.setIndex( 0 );
374             final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION );
375 
376             if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() )
377             {
378                 return date;
379             }
380         }
381 
382         throw new ParseException( "Unable to parse date: " + str, -1 );
383     }
384 
385       //
386      // private
387     //
388 
389     private static boolean isAsciiLetter( final char c )
390     {
391         return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) );
392     }
393 
394     private static boolean isAsciiDigit( final char c )
395     {
396         return ( c >= '0' && c <= '9' );
397     }
398 
399     /**
400      * Determine width and height of an image. If successful, the returned SinkEventAttributes
401      * contain width and height attribute keys whose values are the width and height of the image (as a String).
402      *
403      * @param logo a String containing either a URL or a path to an image file. Not null.
404      *
405      * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
406      *
407      * @throws java.io.IOException if an error occurs during reading.
408      * @throws NullPointerException if logo is null.
409      *
410      * @since 1.1.1
411      */
412     public static MutableAttributeSet getImageAttributes( final String logo )
413             throws IOException
414     {
415         BufferedImage img = null;
416 
417         if ( isExternalLink( logo ) )
418         {
419             img = ImageIO.read( new URL( logo ) );
420         }
421         else
422         {
423             img = ImageIO.read( new File( logo ) );
424         }
425 
426         if ( img == null )
427         {
428             return null;
429         }
430 
431         MutableAttributeSet atts = new SinkEventAttributeSet();
432         atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) );
433         atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) );
434         // add other attributes?
435 
436         return atts;
437     }
438 
439     private DoxiaUtils()
440     {
441         // utility class
442     }
443 }