001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.maven.doxia.module.xhtml5; 020 021import javax.inject.Named; 022import javax.inject.Singleton; 023import javax.swing.text.html.HTML.Attribute; 024 025import java.io.IOException; 026import java.io.Reader; 027import java.io.StringReader; 028import java.io.StringWriter; 029import java.util.HashMap; 030import java.util.Map; 031 032import org.apache.commons.io.IOUtils; 033import org.apache.commons.lang3.StringUtils; 034import org.apache.maven.doxia.macro.MacroExecutionException; 035import org.apache.maven.doxia.macro.MacroRequest; 036import org.apache.maven.doxia.macro.manager.MacroNotFoundException; 037import org.apache.maven.doxia.parser.ParseException; 038import org.apache.maven.doxia.parser.Xhtml5BaseParser; 039import org.apache.maven.doxia.sink.Sink; 040import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 041import org.codehaus.plexus.util.xml.pull.XmlPullParser; 042import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046/** 047 * Parse an xhtml5 model and emit events into a Doxia Sink. 048 */ 049@Singleton 050@Named("xhtml") 051public class Xhtml5Parser extends Xhtml5BaseParser implements Xhtml5Markup { 052 private static final Logger LOGGER = LoggerFactory.getLogger(Xhtml5Parser.class); 053 054 /** For verbatim source. */ 055 protected boolean source; 056 057 /** Empty elements don't write a closing tag. */ 058 private boolean isEmptyElement; 059 060 /** 061 * The source content of the input reader. Used to pass into macros. 062 */ 063 private String sourceContent; 064 065 /** {@inheritDoc} */ 066 protected void handleStartTag(XmlPullParser parser, Sink sink) 067 throws XmlPullParserException, MacroExecutionException { 068 isEmptyElement = parser.isEmptyElementTag(); 069 070 SinkEventAttributeSet attribs = getAttributesFromParser(parser); 071 072 if (parser.getName().equals(HTML.toString())) { 073 // Do nothing 074 return; 075 } else if (parser.getName().equals(HEAD.toString())) { 076 sink.head(attribs); 077 } else if (parser.getName().equals(TITLE.toString())) { 078 sink.title(attribs); 079 } else if (parser.getName().equals(META.toString())) { 080 String name = parser.getAttributeValue(null, Attribute.NAME.toString()); 081 String content = parser.getAttributeValue(null, Attribute.CONTENT.toString()); 082 083 if ("author".equals(name)) { 084 sink.author(null); 085 086 sink.text(content); 087 088 sink.author_(); 089 } else if ("date".equals(name)) { 090 sink.date(null); 091 092 sink.text(content); 093 094 sink.date_(); 095 } else { 096 sink.unknown("meta", new Object[] {TAG_TYPE_SIMPLE}, attribs); 097 } 098 } 099 /* 100 * The ADDRESS element may be used by authors to supply contact information 101 * for a model or a major part of a model such as a form. This element 102 * often appears at the beginning or end of a model. 103 */ 104 else if (parser.getName().equals(ADDRESS.toString())) { 105 sink.address(attribs); 106 } else if (parser.getName().equals(BODY.toString())) { 107 sink.body(attribs); 108 } else if (parser.getName().equals(DIV.toString())) { 109 String divClass = parser.getAttributeValue(null, Attribute.CLASS.toString()); 110 111 if ("verbatim source".equals(divClass)) { 112 this.source = true; 113 } 114 115 baseStartTag(parser, sink); // pick up other divs 116 } 117 /* 118 * The PRE element tells visual user agents that the enclosed text is 119 * "preformatted". When handling preformatted text, visual user agents: 120 * - May leave white space intact. 121 * - May render text with a fixed-pitch font. 122 * - May disable automatic word wrap. 123 * - Must not disable bidirectional processing. 124 * Non-visual user agents are not required to respect extra white space 125 * in the content of a PRE element. 126 */ 127 else if (parser.getName().equals(PRE.toString())) { 128 if (source) { 129 attribs.addAttributes(SinkEventAttributeSet.SOURCE); 130 } 131 132 verbatim(); 133 134 sink.verbatim(attribs); 135 } else if (!baseStartTag(parser, sink)) { 136 if (isEmptyElement) { 137 handleUnknown(parser, sink, TAG_TYPE_SIMPLE); 138 } else { 139 handleUnknown(parser, sink, TAG_TYPE_START); 140 } 141 142 LOGGER.warn( 143 "Unrecognized xhtml5 tag <{}> at [{}:{}]", 144 parser.getName(), 145 parser.getLineNumber(), 146 parser.getColumnNumber()); 147 } 148 } 149 150 /** {@inheritDoc} */ 151 protected void handleEndTag(XmlPullParser parser, Sink sink) 152 throws XmlPullParserException, MacroExecutionException { 153 if (parser.getName().equals(HTML.toString())) { 154 // Do nothing 155 return; 156 } else if (parser.getName().equals(HEAD.toString())) { 157 sink.head_(); 158 } else if (parser.getName().equals(TITLE.toString())) { 159 sink.title_(); 160 } else if (parser.getName().equals(BODY.toString())) { 161 emitHeadingSections(0, sink, false); 162 163 sink.body_(); 164 } else if (parser.getName().equals(ADDRESS.toString())) { 165 sink.address_(); 166 } else if (parser.getName().equals(DIV.toString())) { 167 this.source = false; 168 baseEndTag(parser, sink); 169 } else if (!baseEndTag(parser, sink)) { 170 if (!isEmptyElement) { 171 handleUnknown(parser, sink, TAG_TYPE_END); 172 } 173 } 174 175 isEmptyElement = false; 176 } 177 178 /** {@inheritDoc} */ 179 @Override 180 protected void handleComment(XmlPullParser parser, Sink sink) throws XmlPullParserException { 181 String text = getText(parser).trim(); 182 183 if (text.startsWith("MACRO") && !isSecondParsing()) { 184 processMacro(text, sink); 185 } else { 186 super.handleComment(parser, sink); 187 } 188 } 189 190 /** process macro embedded in XHTML commment */ 191 private void processMacro(String text, Sink sink) throws XmlPullParserException { 192 String s = text.substring(text.indexOf('{') + 1, text.indexOf('}')); 193 s = escapeForMacro(s); 194 String[] params = StringUtils.split(s, "|"); 195 String macroName = params[0]; 196 197 Map<String, Object> parameters = new HashMap<>(); 198 for (int i = 1; i < params.length; i++) { 199 String[] param = StringUtils.split(params[i], "="); 200 if (param.length == 1) { 201 throw new XmlPullParserException("Missing 'key=value' pair for macro parameter: " + params[i]); 202 } 203 204 String key = unescapeForMacro(param[0]); 205 String value = unescapeForMacro(param[1]); 206 parameters.put(key, value); 207 } 208 209 MacroRequest request = new MacroRequest(sourceContent, new Xhtml5Parser(), parameters, getBasedir()); 210 211 try { 212 executeMacro(macroName, request, sink); 213 } catch (MacroExecutionException e) { 214 throw new XmlPullParserException("Unable to execute macro in the document: " + macroName); 215 } catch (MacroNotFoundException me) { 216 throw new XmlPullParserException("Macro not found: " + macroName); 217 } 218 } 219 220 /** 221 * escapeForMacro 222 * 223 * @param s String 224 * @return String 225 */ 226 private String escapeForMacro(String s) { 227 if (s == null || s.length() < 1) { 228 return s; 229 } 230 231 String result = s; 232 233 // use some outrageously out-of-place chars for text 234 // (these are device control one/two in unicode) 235 result = StringUtils.replace(result, "\\=", "\u0011"); 236 result = StringUtils.replace(result, "\\|", "\u0012"); 237 238 return result; 239 } 240 241 /** 242 * unescapeForMacro 243 * 244 * @param s String 245 * @return String 246 */ 247 private String unescapeForMacro(String s) { 248 if (s == null || s.length() < 1) { 249 return s; 250 } 251 252 String result = s; 253 254 result = StringUtils.replace(result, "\u0011", "="); 255 result = StringUtils.replace(result, "\u0012", "|"); 256 257 return result; 258 } 259 260 /** 261 * {@inheritDoc} 262 */ 263 protected void init() { 264 super.init(); 265 266 this.source = false; 267 this.isEmptyElement = false; 268 } 269 270 /** {@inheritDoc} */ 271 public void parse(Reader source, Sink sink, String reference) throws ParseException { 272 this.sourceContent = null; 273 274 try (Reader reader = source) { 275 StringWriter contentWriter = new StringWriter(); 276 IOUtils.copy(reader, contentWriter); 277 sourceContent = contentWriter.toString(); 278 } catch (IOException ex) { 279 throw new ParseException("Error reading the input source", ex); 280 } 281 282 try { 283 super.parse(new StringReader(sourceContent), sink, reference); 284 } finally { 285 this.sourceContent = null; 286 } 287 } 288}