001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.doxia.util;
020
021import javax.imageio.ImageIO;
022import javax.swing.text.MutableAttributeSet;
023
024import java.awt.image.BufferedImage;
025import java.io.File;
026import java.io.IOException;
027import java.net.URL;
028import java.nio.charset.StandardCharsets;
029import java.text.ParseException;
030import java.text.ParsePosition;
031import java.text.SimpleDateFormat;
032import java.util.Date;
033import java.util.Locale;
034
035import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
036
037/**
038 * General Doxia utility methods. The methods in this class should not assume
039 * any specific Doxia module or document format.
040 *
041 * @author ltheussl
042 * @since 1.1
043 */
044public class DoxiaUtils {
045    /**
046     * Checks if the given string corresponds to an internal link,
047     * ie it is a link to an anchor within the same document.
048     * If link is not null, then exactly one of the three methods
049     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
050     * {@link #isLocalLink(java.lang.String)} will return true.
051     *
052     * @param link The link to check. Not null.
053     * @return True if the link starts with "#".
054     *
055     * @throws NullPointerException if link is null.
056     * @see #isExternalLink(String)
057     * @see #isLocalLink(String)
058     */
059    public static boolean isInternalLink(final String link) {
060        return link.startsWith("#");
061    }
062
063    /**
064     * Checks if the given string corresponds to an external URI,
065     * ie is not a link within the same document nor a relative link
066     * to another document (a local link) of the same site.
067     * If link is not null, then exactly one of the three methods
068     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
069     * {@link #isLocalLink(java.lang.String)} will return true.
070     *
071     * @param link The link to check. Not null.
072     * @return True if the link (ignoring case) starts with either "http:/",
073     * "https:/", "ftp:/", "mailto:", "file:/", or contains the string "://".
074     * Note that Windows style separators "\" are not allowed
075     * for URIs, see  http://www.ietf.org/rfc/rfc2396.txt , section 2.4.3.
076     *
077     * @throws NullPointerException if link is null.
078     *
079     * @see #isInternalLink(String)
080     * @see #isLocalLink(String)
081     */
082    public static boolean isExternalLink(final String link) {
083        String text = link.toLowerCase(Locale.ENGLISH);
084
085        return (text.startsWith("http:/")
086                || text.startsWith("https:/")
087                || text.startsWith("ftp:/")
088                || text.startsWith("mailto:")
089                || text.startsWith("file:/")
090                || text.contains("://"));
091    }
092
093    /**
094     * Checks if the given string corresponds to a relative link to another document
095     * within the same site, ie it is neither an {@link #isInternalLink(String) internal}
096     * nor an {@link #isExternalLink(String) external} link.
097     * If link is not null, then exactly one of the three methods
098     * {@link #isInternalLink(java.lang.String)}, {@link #isExternalLink(java.lang.String)} and
099     * {@link #isLocalLink(java.lang.String)} will return true.
100     *
101     * @param link The link to check. Not null.
102     * @return True if the link is neither an external nor an internal link.
103     *
104     * @throws NullPointerException if link is null.
105     *
106     * @see #isExternalLink(String)
107     * @see #isInternalLink(String)
108     */
109    public static boolean isLocalLink(final String link) {
110        return (!isExternalLink(link) && !isInternalLink(link));
111    }
112
113    /**
114     * Construct a valid Doxia id.
115     *
116     * <p>
117     *   A valid Doxia id corresponds to an XML id which is a {code NCName} which is in turn identical
118     *   to a <a href="https://www.w3.org/TR/REC-xml/#NT-Name">{@code Name}</a>, but without a colon
119     *   and without any character above {@code 0x7F}.
120     * </p>
121     * <p>
122     *   To achieve this we need to convert the <i>id</i> String. Two conversions
123     *   are necessary and one is done to get prettier ids:
124     * </p>
125     * <ol>
126     *   <li>Trim with {@link String#trim()} before starting to process,</li>
127     *   <li>if the first character is not a {@code NameStartChar} prepend the letter 'a',</li>
128     *   <li>any space character ({@code 0x20}) is replaced with an underscore,</li>
129     *   <li>
130     *     any character not matching the above pattern is either dropped,
131     *     or replaced with its UTF-8 encoding where each byte is prepended with a dot.
132     *   </li>
133     * </ol>
134     *
135     * <p>
136     * Here are some examples:
137     * </p>
138     * <pre>
139     * DoxiaUtils.encodeId(null)        = null
140     * DoxiaUtils.encodeId("")          = null
141     * DoxiaUtils.encodeId("  ")        = null
142     * DoxiaUtils.encodeId(" _ ")       = "_"
143     * DoxiaUtils.encodeId("1")         = "a1"
144     * DoxiaUtils.encodeId("1anchor")   = "a1anchor"
145     * DoxiaUtils.encodeId("_anchor")   = "_anchor"
146     * DoxiaUtils.encodeId("a b-c123 ") = "a_b-c123"
147     * DoxiaUtils.encodeId("   anchor") = "anchor"
148     * DoxiaUtils.encodeId("myAnchor")  = "myAnchor"
149     * DoxiaUtils.encodeId("€")         = "a.E2.82.AC"
150     * </pre>
151     *
152     * @param text The text to be encoded.
153     *      May be null, empty or blank in which case null is returned.
154     * @return The trimmed and encoded id, or null if id is null.
155     * If id is not null, the return value is guaranteed to be a valid Doxia id.
156     * @see #isValidId(java.lang.String)
157     * @since 1.1.1
158     */
159    public static String encodeId(final String text) {
160        if (text == null) {
161            return null;
162        }
163
164        final String textt = text.trim();
165        int length = textt.length();
166
167        if (length == 0) {
168            return null;
169        }
170
171        StringBuilder buffer = new StringBuilder(length);
172
173        for (int i = 0; i < length; ++i) {
174            char c = textt.charAt(i);
175
176            if ((i == 0) && !(isAsciiLetter(c) || c == '_')) {
177                buffer.append('a');
178            }
179
180            if (c == ' ') {
181                buffer.append('_');
182            } else if (isAsciiLetter(c) || isAsciiDigit(c) || (c == '-') || (c == '_') || (c == '.')) {
183                buffer.append(c);
184            } else {
185
186                byte[] bytes = String.valueOf(c).getBytes(StandardCharsets.UTF_8);
187
188                for (byte aByte : bytes) {
189                    buffer.append('.');
190                    buffer.append(String.format("%02X", aByte));
191                }
192            }
193        }
194
195        return buffer.toString();
196    }
197
198    /**
199     * Determines if the specified text is a valid id according to the rules
200     * laid out in {@link #encodeId(String)}.
201     *
202     * @param text The id to be tested.
203     *      May be null or empty in which case false is returned.
204     * @return <code>true</code> if the text is a valid id, otherwise <code>false</code>.
205     * @see #encodeId(String)
206     */
207    public static boolean isValidId(final String text) {
208        if (text == null || text.length() == 0) {
209            return false;
210        }
211
212        for (int i = 0; i < text.length(); ++i) {
213            char c = text.charAt(i);
214
215            if (isAsciiLetter(c) || c == '_') {
216                continue;
217            }
218
219            if ((i == 0) || (!isAsciiDigit(c) && c != '-' && c != '.')) {
220                return false;
221            }
222        }
223
224        return true;
225    }
226
227    private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("", Locale.ENGLISH);
228    private static final ParsePosition DATE_PARSE_POSITION = new ParsePosition(0);
229    private static final String[] DATE_PATTERNS = new String[] {
230        "yyyy-MM-dd",
231        "yyyy/MM/dd",
232        "yyyyMMdd",
233        "yyyy",
234        "dd.MM.yyyy",
235        "dd MMM yyyy",
236        "dd MMM. yyyy",
237        "MMMM yyyy",
238        "MMM. dd, yyyy",
239        "MMM. yyyy",
240        "MMMM dd, yyyy",
241        "MMM d, ''yy",
242        "MMM. ''yy",
243        "MMMM ''yy"
244    };
245
246    /**
247     * <p>Parses a string representing a date by trying different date patterns.</p>
248     *
249     * <p>The following date patterns are tried (in the given order):</p>
250     *
251     * <pre>"yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd", "yyyy", "dd.MM.yyyy", "dd MMM yyyy",
252     *  "dd MMM. yyyy", "MMMM yyyy", "MMM. dd, yyyy", "MMM. yyyy", "MMMM dd, yyyy",
253     *  "MMM d, ''yy", "MMM. ''yy", "MMMM ''yy"</pre>
254     *
255     * <p>A parse is only sucessful if it parses the whole of the input string.
256     * If no parse patterns match, a ParseException is thrown.</p>
257     *
258     * <p>As a special case, the strings <code>"today"</code> and <code>"now"</code>
259     * (ignoring case) return the current date.</p>
260     *
261     * @param str the date to parse, not null.
262     * @return the parsed date, or the current date if the input String (ignoring case) was
263     *      <code>"today"</code> or <code>"now"</code>.
264     *
265     * @throws ParseException if no pattern matches.
266     * @throws NullPointerException if str is null.
267     * @since 1.1.1.
268     */
269    public static Date parseDate(final String str) throws ParseException {
270        if ("today".equalsIgnoreCase(str) || "now".equalsIgnoreCase(str)) {
271            return new Date();
272        }
273
274        for (String datePattern : DATE_PATTERNS) {
275            DATE_PARSER.applyPattern(datePattern);
276            DATE_PARSE_POSITION.setIndex(0);
277            final Date date = DATE_PARSER.parse(str, DATE_PARSE_POSITION);
278
279            if (date != null && DATE_PARSE_POSITION.getIndex() == str.length()) {
280                return date;
281            }
282        }
283
284        throw new ParseException("Unable to parse date: " + str, -1);
285    }
286
287    //
288    // private
289    //
290
291    private static boolean isAsciiLetter(final char c) {
292        return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
293    }
294
295    private static boolean isAsciiDigit(final char c) {
296        return (c >= '0' && c <= '9');
297    }
298
299    /**
300     * Determine width and height of an image. If successful, the returned SinkEventAttributes
301     * contain width and height attribute keys whose values are the width and height of the image (as a String).
302     *
303     * @param logo a String containing either a URL or a path to an image file. Not null.
304     * @return a set of SinkEventAttributes, or null if no ImageReader was found to read the image.
305     *
306     * @throws java.io.IOException if an error occurs during reading.
307     * @throws NullPointerException if logo is null.
308     *
309     * @since 1.1.1
310     */
311    public static MutableAttributeSet getImageAttributes(final String logo) throws IOException {
312        BufferedImage img;
313
314        if (isExternalLink(logo)) {
315            img = ImageIO.read(new URL(logo));
316        } else {
317            img = ImageIO.read(new File(logo));
318        }
319
320        if (img == null) {
321            return null;
322        }
323
324        MutableAttributeSet atts = new SinkEventAttributeSet();
325        atts.addAttribute(SinkEventAttributeSet.WIDTH, Integer.toString(img.getWidth()));
326        atts.addAttribute(SinkEventAttributeSet.HEIGHT, Integer.toString(img.getHeight()));
327        // add other attributes?
328
329        return atts;
330    }
331
332    private DoxiaUtils() {
333        // utility class
334    }
335}