001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.maven.doxia.util; 020 021import javax.imageio.ImageIO; 022import javax.swing.text.MutableAttributeSet; 023 024import java.awt.image.BufferedImage; 025import java.io.File; 026import java.io.IOException; 027import java.net.URL; 028import java.nio.charset.StandardCharsets; 029import java.text.ParseException; 030import java.text.ParsePosition; 031import java.text.SimpleDateFormat; 032import java.util.Date; 033import java.util.Locale; 034 035import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 036 037/** 038 * General Doxia utility methods. The methods in this class should not assume 039 * any specific Doxia module or document format. 040 * 041 * @author ltheussl 042 * @since 1.1 043 */ 044public class DoxiaUtils { 045 /** 046 * Checks if the given string corresponds to an internal link, 047 * ie it is a link to an anchor within the same document. 048 * If link is not null, then exactly one of the three methods 049 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 050 * {@link #isLocalLink(java.lang.String)} will return true. 051 * 052 * @param link The link to check. Not null. 053 * @return True if the link starts with "#". 054 * 055 * @throws NullPointerException if link is null. 056 * @see #isExternalLink(String) 057 * @see #isLocalLink(String) 058 */ 059 public static boolean isInternalLink(final String link) { 060 return link.startsWith("#"); 061 } 062 063 /** 064 * Checks if the given string corresponds to an external URI, 065 * ie is not a link within the same document nor a relative link 066 * to another document (a local link) of the same site. 067 * If link is not null, then exactly one of the three methods 068 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 069 * {@link #isLocalLink(java.lang.String)} will return true. 070 * 071 * @param link The link to check. Not null. 072 * @return True if the link (ignoring case) starts with either "http:/", 073 * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://". 074 * Note that Windows style separators "\" are not allowed 075 * for URIs, see http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3. 076 * 077 * @throws NullPointerException if link is null. 078 * 079 * @see #isInternalLink(String) 080 * @see #isLocalLink(String) 081 */ 082 public static boolean isExternalLink(final String link) { 083 String text = link.toLowerCase(Locale.ENGLISH); 084 085 return (text.startsWith("http:/") 086 || text.startsWith("https:/") 087 || text.startsWith("ftp:/") 088 || text.startsWith("mailto:") 089 || text.startsWith("file:/") 090 || text.contains("://")); 091 } 092 093 /** 094 * Checks if the given string corresponds to a relative link to another document 095 * within the same site, ie it is neither an {@link #isInternalLink(String) internal} 096 * nor an {@link #isExternalLink(String) external} link. 097 * If link is not null, then exactly one of the three methods 098 * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and 099 * {@link #isLocalLink(java.lang.String)} will return true. 100 * 101 * @param link The link to check. Not null. 102 * @return True if the link is neither an external nor an internal link. 103 * 104 * @throws NullPointerException if link is null. 105 * 106 * @see #isExternalLink(String) 107 * @see #isInternalLink(String) 108 */ 109 public static boolean isLocalLink(final String link) { 110 return (!isExternalLink(link) && !isInternalLink(link)); 111 } 112 113 /** 114 * Construct a valid Doxia id. 115 * 116 * <p> 117 * A valid Doxia id corresponds to an XML id which is a {code NCName} which is in turn identical 118 * to a <a href="https://www.w3.org/TR/REC-xml/#NT-Name">{@code Name}</a>, but without a colon 119 * and without any character above {@code 0x7F}. 120 * </p> 121 * <p> 122 * To achieve this we need to convert the <i>id</i> String. Two conversions 123 * are necessary and one is done to get prettier ids: 124 * </p> 125 * <ol> 126 * <li>Trim with {@link String#trim()} before starting to process,</li> 127 * <li>if the first character is not a {@code NameStartChar} prepend the letter 'a',</li> 128 * <li>any space character ({@code 0x20}) is replaced with an underscore,</li> 129 * <li> 130 * any character not matching the above pattern is either dropped, 131 * or replaced with its UTF-8 encoding where each byte is prepended with a dot. 132 * </li> 133 * </ol> 134 * 135 * <p> 136 * Here are some examples: 137 * </p> 138 * <pre> 139 * DoxiaUtils.encodeId(null) = null 140 * DoxiaUtils.encodeId("") = null 141 * DoxiaUtils.encodeId(" ") = null 142 * DoxiaUtils.encodeId(" _ ") = "_" 143 * DoxiaUtils.encodeId("1") = "a1" 144 * DoxiaUtils.encodeId("1anchor") = "a1anchor" 145 * DoxiaUtils.encodeId("_anchor") = "_anchor" 146 * DoxiaUtils.encodeId("a b-c123 ") = "a_b-c123" 147 * DoxiaUtils.encodeId(" anchor") = "anchor" 148 * DoxiaUtils.encodeId("myAnchor") = "myAnchor" 149 * DoxiaUtils.encodeId("€") = "a.E2.82.AC" 150 * </pre> 151 * 152 * @param text The text to be encoded. 153 * May be null, empty or blank in which case null is returned. 154 * @return The trimmed and encoded id, or null if id is null. 155 * If id is not null, the return value is guaranteed to be a valid Doxia id. 156 * @see #isValidId(java.lang.String) 157 * @since 1.1.1 158 */ 159 public static String encodeId(final String text) { 160 if (text == null) { 161 return null; 162 } 163 164 final String textt = text.trim(); 165 int length = textt.length(); 166 167 if (length == 0) { 168 return null; 169 } 170 171 StringBuilder buffer = new StringBuilder(length); 172 173 for (int i = 0; i < length; ++i) { 174 char c = textt.charAt(i); 175 176 if ((i == 0) && !(isAsciiLetter(c) || c == '_')) { 177 buffer.append('a'); 178 } 179 180 if (c == ' ') { 181 buffer.append('_'); 182 } else if (isAsciiLetter(c) || isAsciiDigit(c) || (c == '-') || (c == '_') || (c == '.')) { 183 buffer.append(c); 184 } else { 185 186 byte[] bytes = String.valueOf(c).getBytes(StandardCharsets.UTF_8); 187 188 for (byte aByte : bytes) { 189 buffer.append('.'); 190 buffer.append(String.format("%02X", aByte)); 191 } 192 } 193 } 194 195 return buffer.toString(); 196 } 197 198 /** 199 * Determines if the specified text is a valid id according to the rules 200 * laid out in {@link #encodeId(String)}. 201 * 202 * @param text The id to be tested. 203 * May be null or empty in which case false is returned. 204 * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>. 205 * @see #encodeId(String) 206 */ 207 public static boolean isValidId(final String text) { 208 if (text == null || text.length() == 0) { 209 return false; 210 } 211 212 for (int i = 0; i < text.length(); ++i) { 213 char c = text.charAt(i); 214 215 if (isAsciiLetter(c) || c == '_') { 216 continue; 217 } 218 219 if ((i == 0) || (!isAsciiDigit(c) && c != '-' && c != '.')) { 220 return false; 221 } 222 } 223 224 return true; 225 } 226 227 private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("", Locale.ENGLISH); 228 private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition(0); 229 private static final String[] DATE_PATTERNS = new String[] { 230 "yyyy-MM-dd", 231 "yyyy/MM/dd", 232 "yyyyMMdd", 233 "yyyy", 234 "dd.MM.yyyy", 235 "dd MMM yyyy", 236 "dd MMM. yyyy", 237 "MMMM yyyy", 238 "MMM. dd, yyyy", 239 "MMM. yyyy", 240 "MMMM dd, yyyy", 241 "MMM d, ''yy", 242 "MMM. ''yy", 243 "MMMM ''yy" 244 }; 245 246 /** 247 * <p>Parses a string representing a date by trying different date patterns.</p> 248 * 249 * <p>The following date patterns are tried (in the given order):</p> 250 * 251 * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy", 252 * "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy", 253 * "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre> 254 * 255 * <p>A parse is only sucessful if it parses the whole of the input string. 256 * If no parse patterns match, a ParseException is thrown.</p> 257 * 258 * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code> 259 * (ignoring case) return the current date.</p> 260 * 261 * @param str the date to parse, not null. 262 * @return the parsed date, or the current date if the input String (ignoring case) was 263 * <code>"today"</code> or <code>"now"</code>. 264 * 265 * @throws ParseException if no pattern matches. 266 * @throws NullPointerException if str is null. 267 * @since 1.1.1. 268 */ 269 public static Date parseDate(final String str) throws ParseException { 270 if ("today".equalsIgnoreCase(str) || "now".equalsIgnoreCase(str)) { 271 return new Date(); 272 } 273 274 for (String datePattern : DATE_PATTERNS) { 275 DATE_PARSER.applyPattern(datePattern); 276 DATE_PARSE_POSITION.setIndex(0); 277 final Date date = DATE_PARSER.parse(str, DATE_PARSE_POSITION); 278 279 if (date != null && DATE_PARSE_POSITION.getIndex() == str.length()) { 280 return date; 281 } 282 } 283 284 throw new ParseException("Unable to parse date: " + str, -1); 285 } 286 287 // 288 // private 289 // 290 291 private static boolean isAsciiLetter(final char c) { 292 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); 293 } 294 295 private static boolean isAsciiDigit(final char c) { 296 return (c >= '0' && c <= '9'); 297 } 298 299 /** 300 * Determine width and height of an image. If successful, the returned SinkEventAttributes 301 * contain width and height attribute keys whose values are the width and height of the image (as a String). 302 * 303 * @param logo a String containing either a URL or a path to an image file. Not null. 304 * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image. 305 * 306 * @throws java.io.IOException if an error occurs during reading. 307 * @throws NullPointerException if logo is null. 308 * 309 * @since 1.1.1 310 */ 311 public static MutableAttributeSet getImageAttributes(final String logo) throws IOException { 312 BufferedImage img; 313 314 if (isExternalLink(logo)) { 315 img = ImageIO.read(new URL(logo)); 316 } else { 317 img = ImageIO.read(new File(logo)); 318 } 319 320 if (img == null) { 321 return null; 322 } 323 324 MutableAttributeSet atts = new SinkEventAttributeSet(); 325 atts.addAttribute(SinkEventAttributeSet.WIDTH, Integer.toString(img.getWidth())); 326 atts.addAttribute(SinkEventAttributeSet.HEIGHT, Integer.toString(img.getHeight())); 327 // add other attributes? 328 329 return atts; 330 } 331 332 private DoxiaUtils() { 333 // utility class 334 } 335}