1 package org.apache.maven.doxia.util; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 */ 21 22 import java.awt.image.BufferedImage; 23 24 import java.io.File; 25 import java.io.IOException; 26 27 import java.net.URL; 28 29 import java.nio.charset.StandardCharsets; 30 import java.text.ParseException; 31 import java.text.ParsePosition; 32 import java.text.SimpleDateFormat; 33 34 import java.util.Date; 35 import java.util.Locale; 36 37 import javax.imageio.ImageIO; 38 39 import javax.swing.text.MutableAttributeSet; 40 41 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 42 43 /** 44 * General Doxia utility methods. The methods in this class should not assume 45 * any specific Doxia module or document format. 46 * 47 * @author ltheussl 48 * @since 1.1 49 */ 50 public class DoxiaUtils 51 { 52 /** 53 * Checks if the given string corresponds to an internal link, 54 * ie it is a link to an anchor within the same document. 55 * If link is not null, then exactly one of the three methods 56 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 57 * {@link #isLocalLink(java.lang.String)} will return true. 58 * 59 * @param link The link to check. Not null. 60 * @return True if the link starts with "#". 61 * 62 * @throws NullPointerException if link is null. 63 * @see #isExternalLink(String) 64 * @see #isLocalLink(String) 65 */ 66 public static boolean isInternalLink( final String link ) 67 { 68 return link.startsWith( "#" ); 69 } 70 71 /** 72 * Checks if the given string corresponds to an external URI, 73 * ie is not a link within the same document nor a relative link 74 * to another document (a local link) of the same site. 75 * If link is not null, then exactly one of the three methods 76 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 77 * {@link #isLocalLink(java.lang.String)} will return true. 78 * 79 * @param link The link to check. Not null. 80 * @return True if the link (ignoring case) starts with either "http:/", 81 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://". 82 * Note that Windows style separators "\" are not allowed 83 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3. 84 * 85 * @throws NullPointerException if link is null. 86 * 87 * @see #isInternalLink(String) 88 * @see #isLocalLink(String) 89 */ 90 public static boolean isExternalLink( final String link ) 91 { 92 String text = link.toLowerCase( Locale.ENGLISH ); 93 94 return ( text.startsWith( "http:/" ) || text.startsWith( "https:/" ) 95 || text.startsWith( "ftp:/" ) || text.startsWith( "mailto:" ) 96 || text.startsWith( "file:/" ) || text.contains( "://" ) ); 97 } 98 99 /** 100 * Checks if the given string corresponds to a relative link to another document 101 * within the same site, ie it is neither an {@link #isInternalLink(String) internal} 102 * nor an {@link #isExternalLink(String) external} link. 103 * If link is not null, then exactly one of the three methods 104 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 105 * {@link #isLocalLink(java.lang.String)} will return true. 106 * 107 * @param link The link to check. Not null. 108 * @return True if the link is neither an external nor an internal link. 109 * 110 * @throws NullPointerException if link is null. 111 * 112 * @see #isExternalLink(String) 113 * @see #isInternalLink(String) 114 */ 115 public static boolean isLocalLink( final String link ) 116 { 117 return ( !isExternalLink( link ) && !isInternalLink( link ) ); 118 } 119 120 /** 121 * Construct a valid Doxia id. 122 * 123 * <p> 124 * This method is equivalent to {@link #encodeId(java.lang.String, boolean) encodeId( id, false )}. 125 * </p> 126 * 127 * @param id The id to be encoded. 128 * May be null in which case null is returned. 129 * @return The trimmed and encoded id, or null if id is null. 130 * @see #encodeId(java.lang.String, boolean) 131 */ 132 public static String encodeId( final String id ) 133 { 134 return encodeId( id, false ); 135 } 136 137 /** 138 * Construct a valid Doxia id. 139 * 140 * <p> 141 * A valid Doxia id obeys the same constraints as an HTML ID or NAME token. 142 * According to the <a href="http://www.w3.org/TR/html4/types.html#type-name"> 143 * HTML 4.01 specification section 6.2 SGML basic types</a>: 144 * </p> 145 * <p> 146 * <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be 147 * followed by any number of letters, digits ([0-9]), hyphens ("-"), 148 * underscores ("_"), colons (":"), and periods (".").</i> 149 * </p> 150 * <p> 151 * According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0 152 * section C.8. Fragment Identifiers</a>: 153 * </p> 154 * <p> 155 * <i>When defining fragment identifiers to be backward-compatible, only 156 * strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i> 157 * </p> 158 * <p> 159 * To achieve this we need to convert the <i>id</i> String. Two conversions 160 * are necessary and one is done to get prettier ids: 161 * </p> 162 * <ol> 163 * <li>Remove whitespace at the start and end before starting to process</li> 164 * <li>If the first character is not a letter, prepend the id with the letter 'a'</li> 165 * <li>Any spaces are replaced with an underscore '_'</li> 166 * <li> 167 * Any characters not matching the above pattern are either dropped, 168 * or replaced according to the rules specified in the 169 * <a href="http://www.w3.org/TR/html4/appendix/notes.html#non-ascii-chars">HTML specs</a>. 170 * </li> 171 * </ol> 172 * <p> 173 * For letters, the case is preserved in the conversion. 174 * </p> 175 * 176 * <p> 177 * Here are some examples: 178 * </p> 179 * <pre> 180 * DoxiaUtils.encodeId( null ) = null 181 * DoxiaUtils.encodeId( "" ) = "a" 182 * DoxiaUtils.encodeId( " " ) = "a" 183 * DoxiaUtils.encodeId( " _ " ) = "a_" 184 * DoxiaUtils.encodeId( "1" ) = "a1" 185 * DoxiaUtils.encodeId( "1anchor" ) = "a1anchor" 186 * DoxiaUtils.encodeId( "_anchor" ) = "a_anchor" 187 * DoxiaUtils.encodeId( "a b-c123 " ) = "a_b-c123" 188 * DoxiaUtils.encodeId( " anchor" ) = "anchor" 189 * DoxiaUtils.encodeId( "myAnchor" ) = "myAnchor" 190 * </pre> 191 * 192 * @param id The id to be encoded. 193 * May be null in which case null is returned. 194 * @param chop true if non-ASCII characters should be ignored. 195 * If false, any non-ASCII characters will be replaced as specified above. 196 * @return The trimmed and encoded id, or null if id is null. 197 * If id is not null, the return value is guaranteed to be a valid Doxia id. 198 * @see #isValidId(java.lang.String) 199 * @since 1.1.1 200 */ 201 public static String encodeId( final String id, final boolean chop ) 202 { 203 if ( id == null ) 204 { 205 return null; 206 } 207 208 final String idd = id.trim(); 209 int length = idd.length(); 210 211 if ( length == 0 ) 212 { 213 return "a"; 214 } 215 216 StringBuilder buffer = new StringBuilder( length ); 217 218 for ( int i = 0; i < length; ++i ) 219 { 220 char c = idd.charAt( i ); 221 222 if ( ( i == 0 ) && ( !isAsciiLetter( c ) ) ) 223 { 224 buffer.append( 'a' ); 225 } 226 227 if ( c == ' ' ) 228 { 229 buffer.append( '_' ); 230 } 231 else if ( isAsciiLetter( c ) || isAsciiDigit( c ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' ) 232 || ( c == '.' ) ) 233 { 234 buffer.append( c ); 235 } 236 else if ( !chop ) 237 { 238 239 byte[] bytes = String.valueOf( c ).getBytes( StandardCharsets.UTF_8 ); 240 241 for ( byte aByte : bytes ) 242 { 243 buffer.append( '.' ); 244 buffer.append( String.format( "%02X", aByte ) ); 245 } 246 } 247 } 248 249 return buffer.toString(); 250 } 251 252 /** 253 * Determines if the specified text is a valid id according to the rules 254 * laid out in {@link #encodeId(String)}. 255 * 256 * @param text The text to be tested. 257 * May be null in which case false is returned. 258 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>. 259 * @see #encodeId(String) 260 */ 261 public static boolean isValidId( final String text ) 262 { 263 if ( text == null || text.length() == 0 ) 264 { 265 return false; 266 } 267 268 for ( int i = 0; i < text.length(); ++i ) 269 { 270 char c = text.charAt( i ); 271 272 if ( isAsciiLetter( c ) ) 273 { 274 continue; 275 } 276 277 if ( ( i == 0 ) || ( c == ' ' ) || ( !isAsciiDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) ) 278 { 279 return false; 280 } 281 } 282 283 return true; 284 } 285 286 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat( "", Locale.ENGLISH ); 287 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition( 0 ); 288 private static final String[] DATE_PATTERNS = new String[] 289 { 290 "yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 291 "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 292 "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy" 293 }; 294 295 /** 296 * <p>Parses a string representing a date by trying different date patterns.</p> 297 * 298 * <p>The following date patterns are tried (in the given order):</p> 299 * 300 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 301 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 302 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre> 303 * 304 * <p>A parse is only sucessful if it parses the whole of the input string. 305 * If no parse patterns match, a ParseException is thrown.</p> 306 * 307 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code> 308 * (ignoring case) return the current date.</p> 309 * 310 * @param str the date to parse, not null. 311 * @return the parsed date, or the current date if the input String (ignoring case) was 312 * <code>"today"</code> or <code>"now"</code>. 313 * 314 * @throws ParseException if no pattern matches. 315 * @throws NullPointerException if str is null. 316 * @since 1.1.1. 317 */ 318 public static Date parseDate( final String str ) 319 throws ParseException 320 { 321 if ( "today".equalsIgnoreCase( str ) || "now".equalsIgnoreCase( str ) ) 322 { 323 return new Date(); 324 } 325 326 for ( String datePattern : DATE_PATTERNS ) 327 { 328 DATE_PARSER.applyPattern( datePattern ); 329 DATE_PARSE_POSITION.setIndex( 0 ); 330 final Date date = DATE_PARSER.parse( str, DATE_PARSE_POSITION ); 331 332 if ( date != null && DATE_PARSE_POSITION.getIndex() == str.length() ) 333 { 334 return date; 335 } 336 } 337 338 throw new ParseException( "Unable to parse date: " + str, -1 ); 339 } 340 341 // 342 // private 343 // 344 345 private static boolean isAsciiLetter( final char c ) 346 { 347 return ( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) ); 348 } 349 350 private static boolean isAsciiDigit( final char c ) 351 { 352 return ( c >= '0' && c <= '9' ); 353 } 354 355 /** 356 * Determine width and height of an image. If successful, the returned SinkEventAttributes 357 * contain width and height attribute keys whose values are the width and height of the image (as a String). 358 * 359 * @param logo a String containing either a URL or a path to an image file. Not null. 360 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image. 361 * 362 * @throws java.io.IOException if an error occurs during reading. 363 * @throws NullPointerException if logo is null. 364 * 365 * @since 1.1.1 366 */ 367 public static MutableAttributeSet getImageAttributes( final String logo ) 368 throws IOException 369 { 370 BufferedImage img; 371 372 if ( isExternalLink( logo ) ) 373 { 374 img = ImageIO.read( new URL( logo ) ); 375 } 376 else 377 { 378 img = ImageIO.read( new File( logo ) ); 379 } 380 381 if ( img == null ) 382 { 383 return null; 384 } 385 386 MutableAttributeSet atts = new SinkEventAttributeSet(); 387 atts.addAttribute( SinkEventAttributeSet.WIDTH, Integer.toString( img.getWidth() ) ); 388 atts.addAttribute( SinkEventAttributeSet.HEIGHT, Integer.toString( img.getHeight() ) ); 389 // add other attributes? 390 391 return atts; 392 } 393 394 private DoxiaUtils() 395 { 396 // utility class 397 } 398 }