1 package org.apache.maven.doxia.module.twiki.parser;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26
27 /**
28 * Parse looking for formated text (bold, italic, ...)
29 *
30 * @author Juan F. Codagnone
31 * @version $Id: FormatedTextParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
32 */
33 public class FormatedTextParser
34 {
35 /**
36 * parser used to parse text...
37 */
38 private TextParser textParser;
39
40 /**
41 * map used to create blocks dependening on the text format
42 */
43 private static final Map<String, FormatBlockFactory> FACTORY_MAP = new HashMap<String, FormatBlockFactory>();
44
45 /**
46 * creates bold blocks
47 */
48 private static final FormatBlockFactory BOLD_FACTORY = new FormatBlockFactory()
49 {
50 /** {@inheritDoc} */
51 public Block createBlock( final Block[] childrens )
52 {
53 return new BoldBlock( childrens );
54 }
55 };
56
57 /**
58 * creates italic blocks
59 */
60 private static final FormatBlockFactory ITALIC_FACTORY = new FormatBlockFactory()
61 {
62 /** {@inheritDoc} */
63 public Block createBlock( final Block[] childrens )
64 {
65 return new ItalicBlock( childrens );
66 }
67 };
68
69 /**
70 * creates monospaced blocks
71 */
72 private static final FormatBlockFactory MONOSPACED_FACTORY = new FormatBlockFactory()
73 {
74 /** {@inheritDoc} */
75 public Block createBlock( final Block[] childrens )
76 {
77 return new MonospaceBlock( childrens );
78 }
79 };
80
81 /**
82 * creates bold italic blocks
83 */
84 private static final FormatBlockFactory BOLDITALIC_FACTORY = new FormatBlockFactory()
85 {
86 /** {@inheritDoc} */
87 public Block createBlock( final Block[] childrens )
88 {
89 return new BoldBlock( new Block[] { new ItalicBlock( childrens ) } );
90 }
91 };
92
93 /**
94 * creates bold monospace blocks
95 */
96 private static final FormatBlockFactory BOLDMONO_FACTORY = new FormatBlockFactory()
97 {
98 /** {@inheritDoc} */
99 public Block createBlock( final Block[] childrens )
100 {
101 return new BoldBlock( new Block[] { new MonospaceBlock( childrens ) } );
102 }
103 };
104
105 /**
106 * format characters
107 */
108 private static final String[] SPECIAL_CHAR = new String[] { "__", "==", "*", "_", "=" };
109
110 static
111 {
112 FACTORY_MAP.put( "*", BOLD_FACTORY );
113 FACTORY_MAP.put( "_", ITALIC_FACTORY );
114 FACTORY_MAP.put( "=", MONOSPACED_FACTORY );
115 FACTORY_MAP.put( "__", BOLDITALIC_FACTORY );
116 FACTORY_MAP.put( "==", BOLDMONO_FACTORY );
117 }
118
119 /**
120 * @param line line to parse
121 * @return TextBlock, ItalicBlock, BoldBlock, MonospacedBlock, ...
122 */
123 final Block[] parse( final String line )
124 {
125 return parseFormat( line ).toArray( new Block[] {} );
126 }
127
128 /**
129 * @param c character to test
130 * @return <code>true</code> if c is a space character
131 */
132 static boolean isSpace( final char c )
133 {
134 return c == ' ' || c == '\t';
135 }
136
137 /**
138 * @param c character to test
139 * @return <code>true</code> if c is a character that limits the formats
140 */
141 static boolean isSpecial( final char c )
142 {
143 boolean ret = false;
144
145 for ( int i = 0; !ret && i < SPECIAL_CHAR.length; i++ )
146 {
147 if ( SPECIAL_CHAR[i].charAt( 0 ) == c )
148 {
149 ret = true;
150 }
151 }
152
153 return ret;
154 }
155
156 /**
157 * Parse text format (bold, italic...)
158 * <p/>
159 * TODO too many lines!!
160 *
161 * @param line line to parse
162 * @return list of blocks
163 */
164 private List<Block> parseFormat( final String line )
165 {
166 final List<Block> ret = new ArrayList<Block>();
167 final int[] lhOffsets = new int[SPECIAL_CHAR.length];
168 final int[] rhOffsets = new int[SPECIAL_CHAR.length];
169
170 // for each text format markers...
171 for ( int i = 0; i < SPECIAL_CHAR.length; i++ )
172 {
173 final int specialLen = SPECIAL_CHAR[i].length();
174 int t = 0;
175 // search the nearset instance of this marker...
176 while ( t != -1 && ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
177 {
178 // and check if it at the begining of a word.
179 if ( t == 0 || isSpace( line.charAt( t - 1 ) ) || isParenthesis( line.charAt( t - 1 ) ) )
180 {
181 // if it is, and if, check to avoid going beyond the string
182 if ( t + specialLen < line.length() )
183 {
184 // and if character after the format marker is another
185 // marker, is an error, and should be ignored
186 if ( isSpecial( line.charAt( t + specialLen ) ) )
187 {
188 t += specialLen;
189 }
190 else
191 {
192 // else we find a starter!
193 break;
194 }
195 }
196 else
197 {
198 t = -1;
199 }
200 }
201 else
202 {
203 t += specialLen;
204 }
205 }
206 lhOffsets[i] = t;
207 }
208
209 // for each text format markers...
210 for ( int i = 0; i < lhOffsets.length; i++ )
211 {
212 final int specialLen = SPECIAL_CHAR[i].length();
213 // if we found a text format beginning
214 if ( lhOffsets[i] != -1 )
215 {
216 int t = lhOffsets[i] + specialLen;
217 // search for a text format ending
218 while ( ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
219 {
220 // must be side by side to a word
221 final char c = line.charAt( t - 1 );
222 if ( t > 0 && !isSpace( c ) && !isSpecial( c ) )
223 {
224 break;
225 }
226 else
227 {
228 t += specialLen;
229 }
230 }
231 rhOffsets[i] = t;
232 }
233 }
234
235 // find the nearest index
236 int minIndex = -1;
237 int charType = 0;
238 for ( int i = 0; i < lhOffsets.length; i++ )
239 {
240 if ( lhOffsets[i] != -1 && rhOffsets[i] != 1 )
241 {
242 if ( minIndex == -1 || lhOffsets[i] < minIndex )
243 {
244 if ( rhOffsets[i] > lhOffsets[i] )
245 {
246 // ej: "mary *has a little lamb"
247 minIndex = lhOffsets[i];
248 charType = i;
249 }
250 }
251 }
252 }
253
254 if ( minIndex == -1 )
255 {
256 ret.addAll( textParser.parse( line ) );
257 }
258 else
259 {
260 int len = SPECIAL_CHAR[charType].length();
261 ret.addAll( parseFormat( line.substring( 0, minIndex ) ) );
262 ret.add( FACTORY_MAP.get( SPECIAL_CHAR[charType] )
263 .createBlock( parseFormat( line.substring( minIndex + len, rhOffsets[charType] ) )
264 .toArray( new Block[] {} ) ) );
265 ret.addAll( parseFormat( line.substring( rhOffsets[charType] + len ) ) );
266 }
267
268 // profit
269 return ret;
270 }
271
272 /**
273 * @param c character to test
274 * @return <code>true</code> if c is a parenthesis
275 */
276 private boolean isParenthesis( final char c )
277 {
278 return c == '(' || c == ')';
279 }
280
281 /**
282 * Sets the formatTextParser.
283 *
284 * @param textParser text parser to use
285 * <code>TextParser</code> with the formatTextParser.
286 */
287 public final void setTextParser( final TextParser textParser )
288 {
289 if ( textParser == null )
290 {
291 throw new IllegalArgumentException( "argument can't be null" );
292 }
293
294 this.textParser = textParser;
295 }
296 }
297
298 /**
299 * @author Juan F. Codagnone
300 * @version $Id: FormatedTextParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
301 */
302 interface FormatBlockFactory
303 {
304 /**
305 * factory method of format <code>Block</code>
306 *
307 * @param childrens children of the format block
308 * @return a format block
309 */
310 Block createBlock( final Block[] childrens );
311 }