1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.doxia.util; 20 21 import javax.imageio.ImageIO; 22 import javax.swing.text.MutableAttributeSet; 23 24 import java.awt.image.BufferedImage; 25 import java.io.File; 26 import java.io.IOException; 27 import java.net.URL; 28 import java.nio.charset.StandardCharsets; 29 import java.text.ParseException; 30 import java.text.ParsePosition; 31 import java.text.SimpleDateFormat; 32 import java.util.Date; 33 import java.util.Locale; 34 35 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 36 37 /** 38 * General Doxia utility methods. The methods in this class should not assume 39 * any specific Doxia module or document format. 40 * 41 * @author ltheussl 42 * @since 1.1 43 */ 44 public class DoxiaUtils { 45 /** 46 * Checks if the given string corresponds to an internal link, 47 * ie it is a link to an anchor within the same document. 48 * If link is not null, then exactly one of the three methods 49 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 50 * {@link #isLocalLink(java.lang.String)} will return true. 51 * 52 * @param link The link to check. Not null. 53 * @return True if the link starts with "#". 54 * 55 * @throws NullPointerException if link is null. 56 * @see #isExternalLink(String) 57 * @see #isLocalLink(String) 58 */ 59 public static boolean isInternalLink(final String link) { 60 return link.startsWith("#"); 61 } 62 63 /** 64 * Checks if the given string corresponds to an external URI, 65 * ie is not a link within the same document nor a relative link 66 * to another document (a local link) of the same site. 67 * If link is not null, then exactly one of the three methods 68 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 69 * {@link #isLocalLink(java.lang.String)} will return true. 70 * 71 * @param link The link to check. Not null. 72 * @return True if the link (ignoring case) starts with either "http:/", 73 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://". 74 * Note that Windows style separators "\" are not allowed 75 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3. 76 * 77 * @throws NullPointerException if link is null. 78 * 79 * @see #isInternalLink(String) 80 * @see #isLocalLink(String) 81 */ 82 public static boolean isExternalLink(final String link) { 83 String text = link.toLowerCase(Locale.ENGLISH); 84 85 return (text.startsWith("http:/") 86 || text.startsWith("https:/") 87 || text.startsWith("ftp:/") 88 || text.startsWith("mailto:") 89 || text.startsWith("file:/") 90 || text.contains("://")); 91 } 92 93 /** 94 * Checks if the given string corresponds to a relative link to another document 95 * within the same site, ie it is neither an {@link #isInternalLink(String) internal} 96 * nor an {@link #isExternalLink(String) external} link. 97 * If link is not null, then exactly one of the three methods 98 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 99 * {@link #isLocalLink(java.lang.String)} will return true. 100 * 101 * @param link The link to check. Not null. 102 * @return True if the link is neither an external nor an internal link. 103 * 104 * @throws NullPointerException if link is null. 105 * 106 * @see #isExternalLink(String) 107 * @see #isInternalLink(String) 108 */ 109 public static boolean isLocalLink(final String link) { 110 return (!isExternalLink(link) && !isInternalLink(link)); 111 } 112 113 /** 114 * Construct a valid Doxia id. 115 * 116 * <p> 117 * A valid Doxia id corresponds to an XML id which is a {code NCName} which is in turn identical 118 * to a <a href="https://www.w3.org/TR/REC-xml/#NT-Name">{@code Name}</a>, but without a colon 119 * and without any character above {@code 0x7F}. 120 * </p> 121 * <p> 122 * To achieve this we need to convert the <i>id</i> String. Two conversions 123 * are necessary and one is done to get prettier ids: 124 * </p> 125 * <ol> 126 * <li>Trim with {@link String#trim()} before starting to process,</li> 127 * <li>if the first character is not a {@code NameStartChar} prepend the letter 'a',</li> 128 * <li>any space character ({@code 0x20}) is replaced with an underscore,</li> 129 * <li> 130 * any character not matching the above pattern is either dropped, 131 * or replaced with its UTF-8 encoding where each byte is prepended with a dot. 132 * </li> 133 * </ol> 134 * 135 * <p> 136 * Here are some examples: 137 * </p> 138 * <pre> 139 * DoxiaUtils.encodeId(null) = null 140 * DoxiaUtils.encodeId("") = null 141 * DoxiaUtils.encodeId(" ") = null 142 * DoxiaUtils.encodeId(" _ ") = "_" 143 * DoxiaUtils.encodeId("1") = "a1" 144 * DoxiaUtils.encodeId("1anchor") = "a1anchor" 145 * DoxiaUtils.encodeId("_anchor") = "_anchor" 146 * DoxiaUtils.encodeId("a b-c123 ") = "a_b-c123" 147 * DoxiaUtils.encodeId(" anchor") = "anchor" 148 * DoxiaUtils.encodeId("myAnchor") = "myAnchor" 149 * DoxiaUtils.encodeId("€") = "a.E2.82.AC" 150 * </pre> 151 * 152 * @param text The text to be encoded. 153 * May be null, empty or blank in which case null is returned. 154 * @return The trimmed and encoded id, or null if id is null. 155 * If id is not null, the return value is guaranteed to be a valid Doxia id. 156 * @see #isValidId(java.lang.String) 157 * @since 1.1.1 158 */ 159 public static String encodeId(final String text) { 160 if (text == null) { 161 return null; 162 } 163 164 final String textt = text.trim(); 165 int length = textt.length(); 166 167 if (length == 0) { 168 return null; 169 } 170 171 StringBuilder buffer = new StringBuilder(length); 172 173 for (int i = 0; i < length; ++i) { 174 char c = textt.charAt(i); 175 176 if ((i == 0) && !(isAsciiLetter(c) || c == '_')) { 177 buffer.append('a'); 178 } 179 180 if (c == ' ') { 181 buffer.append('_'); 182 } else if (isAsciiLetter(c) || isAsciiDigit(c) || (c == '-') || (c == '_') || (c == '.')) { 183 buffer.append(c); 184 } else { 185 186 byte[] bytes = String.valueOf(c).getBytes(StandardCharsets.UTF_8); 187 188 for (byte aByte : bytes) { 189 buffer.append('.'); 190 buffer.append(String.format("%02X", aByte)); 191 } 192 } 193 } 194 195 return buffer.toString(); 196 } 197 198 /** 199 * Determines if the specified text is a valid id according to the rules 200 * laid out in {@link #encodeId(String)}. 201 * 202 * @param text The id to be tested. 203 * May be null or empty in which case false is returned. 204 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>. 205 * @see #encodeId(String) 206 */ 207 public static boolean isValidId(final String text) { 208 if (text == null || text.length() == 0) { 209 return false; 210 } 211 212 for (int i = 0; i < text.length(); ++i) { 213 char c = text.charAt(i); 214 215 if (isAsciiLetter(c) || c == '_') { 216 continue; 217 } 218 219 if ((i == 0) || (!isAsciiDigit(c) && c != '-' && c != '.')) { 220 return false; 221 } 222 } 223 224 return true; 225 } 226 227 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("", Locale.ENGLISH); 228 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition(0); 229 private static final String[] DATE_PATTERNS = new String[] { 230 "yyyy-MM-dd", 231 "yyyy/MM/dd", 232 "yyyyMMdd", 233 "yyyy", 234 "dd.MM.yyyy", 235 "dd MMM yyyy", 236 "dd MMM. yyyy", 237 "MMMM yyyy", 238 "MMM. dd, yyyy", 239 "MMM. yyyy", 240 "MMMM dd, yyyy", 241 "MMM d, ''yy", 242 "MMM. ''yy", 243 "MMMM ''yy" 244 }; 245 246 /** 247 * <p>Parses a string representing a date by trying different date patterns.</p> 248 * 249 * <p>The following date patterns are tried (in the given order):</p> 250 * 251 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 252 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 253 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre> 254 * 255 * <p>A parse is only sucessful if it parses the whole of the input string. 256 * If no parse patterns match, a ParseException is thrown.</p> 257 * 258 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code> 259 * (ignoring case) return the current date.</p> 260 * 261 * @param str the date to parse, not null. 262 * @return the parsed date, or the current date if the input String (ignoring case) was 263 * <code>"today"</code> or <code>"now"</code>. 264 * 265 * @throws ParseException if no pattern matches. 266 * @throws NullPointerException if str is null. 267 * @since 1.1.1. 268 */ 269 public static Date parseDate(final String str) throws ParseException { 270 if ("today".equalsIgnoreCase(str) || "now".equalsIgnoreCase(str)) { 271 return new Date(); 272 } 273 274 for (String datePattern : DATE_PATTERNS) { 275 DATE_PARSER.applyPattern(datePattern); 276 DATE_PARSE_POSITION.setIndex(0); 277 final Date date = DATE_PARSER.parse(str, DATE_PARSE_POSITION); 278 279 if (date != null && DATE_PARSE_POSITION.getIndex() == str.length()) { 280 return date; 281 } 282 } 283 284 throw new ParseException("Unable to parse date: " + str, -1); 285 } 286 287 // 288 // private 289 // 290 291 private static boolean isAsciiLetter(final char c) { 292 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); 293 } 294 295 private static boolean isAsciiDigit(final char c) { 296 return (c >= '0' && c <= '9'); 297 } 298 299 /** 300 * Determine width and height of an image. If successful, the returned SinkEventAttributes 301 * contain width and height attribute keys whose values are the width and height of the image (as a String). 302 * 303 * @param logo a String containing either a URL or a path to an image file. Not null. 304 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image. 305 * 306 * @throws java.io.IOException if an error occurs during reading. 307 * @throws NullPointerException if logo is null. 308 * 309 * @since 1.1.1 310 */ 311 public static MutableAttributeSet getImageAttributes(final String logo) throws IOException { 312 BufferedImage img; 313 314 if (isExternalLink(logo)) { 315 img = ImageIO.read(new URL(logo)); 316 } else { 317 img = ImageIO.read(new File(logo)); 318 } 319 320 if (img == null) { 321 return null; 322 } 323 324 MutableAttributeSet atts = new SinkEventAttributeSet(); 325 atts.addAttribute(SinkEventAttributeSet.WIDTH, Integer.toString(img.getWidth())); 326 atts.addAttribute(SinkEventAttributeSet.HEIGHT, Integer.toString(img.getHeight())); 327 // add other attributes? 328 329 return atts; 330 } 331 332 private DoxiaUtils() { 333 // utility class 334 } 335 }