View Javadoc

1   package org.apache.maven.doxia.module.twiki.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  /**
28   * Parse looking for formated text (bold, italic, ...)
29   *
30   * @author Juan F. Codagnone
31   * @version $Id: FormatedTextParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
32   */
33  public class FormatedTextParser
34  {
35      /**
36       * parser used to parse text...
37       */
38      private TextParser textParser;
39  
40      /**
41       * map used to create blocks dependening on the text format
42       */
43      private static final Map<String, FormatBlockFactory> FACTORY_MAP = new HashMap<String, FormatBlockFactory>();
44  
45      /**
46       * creates bold blocks
47       */
48      private static final FormatBlockFactory BOLD_FACTORY = new FormatBlockFactory()
49      {
50          /** {@inheritDoc} */
51          public Block createBlock( final Block[] childrens )
52          {
53              return new BoldBlock( childrens );
54          }
55      };
56  
57      /**
58       * creates italic blocks
59       */
60      private static final FormatBlockFactory ITALIC_FACTORY = new FormatBlockFactory()
61      {
62          /** {@inheritDoc} */
63          public Block createBlock( final Block[] childrens )
64          {
65              return new ItalicBlock( childrens );
66          }
67      };
68  
69      /**
70       * creates monospaced blocks
71       */
72      private static final FormatBlockFactory MONOSPACED_FACTORY = new FormatBlockFactory()
73      {
74          /** {@inheritDoc} */
75          public Block createBlock( final Block[] childrens )
76          {
77              return new MonospaceBlock( childrens );
78          }
79      };
80  
81      /**
82       * creates bold italic blocks
83       */
84      private static final FormatBlockFactory BOLDITALIC_FACTORY = new FormatBlockFactory()
85      {
86          /** {@inheritDoc} */
87          public Block createBlock( final Block[] childrens )
88          {
89              return new BoldBlock( new Block[] { new ItalicBlock( childrens ) } );
90          }
91      };
92  
93      /**
94       * creates bold monospace blocks
95       */
96      private static final FormatBlockFactory BOLDMONO_FACTORY = new FormatBlockFactory()
97      {
98          /** {@inheritDoc} */
99          public Block createBlock( final Block[] childrens )
100         {
101             return new BoldBlock( new Block[] { new MonospaceBlock( childrens ) } );
102         }
103     };
104 
105     /**
106      * format characters
107      */
108     private static final String[] SPECIAL_CHAR = new String[] { "__", "==", "*", "_", "=" };
109 
110     static
111     {
112         FACTORY_MAP.put( "*", BOLD_FACTORY );
113         FACTORY_MAP.put( "_", ITALIC_FACTORY );
114         FACTORY_MAP.put( "=", MONOSPACED_FACTORY );
115         FACTORY_MAP.put( "__", BOLDITALIC_FACTORY );
116         FACTORY_MAP.put( "==", BOLDMONO_FACTORY );
117     }
118 
119     /**
120      * @param line line to parse
121      * @return TextBlock, ItalicBlock, BoldBlock, MonospacedBlock, ...
122      */
123     final Block[] parse( final String line )
124     {
125         return parseFormat( line ).toArray( new Block[] {} );
126     }
127 
128     /**
129      * @param c character to test
130      * @return <code>true</code> if c is a space character
131      */
132     static boolean isSpace( final char c )
133     {
134         return c == ' ' || c == '\t';
135     }
136 
137     /**
138      * @param c character to test
139      * @return <code>true</code> if c is a character that limits the formats
140      */
141     static boolean isSpecial( final char c )
142     {
143         boolean ret = false;
144 
145         for ( int i = 0; !ret && i < SPECIAL_CHAR.length; i++ )
146         {
147             if ( SPECIAL_CHAR[i].charAt( 0 ) == c )
148             {
149                 ret = true;
150             }
151         }
152 
153         return ret;
154     }
155 
156     /**
157      * Parse text format (bold, italic...)
158      * <p/>
159      * TODO too many lines!!
160      *
161      * @param line line to parse
162      * @return list of blocks
163      */
164     private List<Block> parseFormat( final String line )
165     {
166         final List<Block> ret = new ArrayList<Block>();
167         final int[] lhOffsets = new int[SPECIAL_CHAR.length];
168         final int[] rhOffsets = new int[SPECIAL_CHAR.length];
169 
170         // for each text format markers...
171         for ( int i = 0; i < SPECIAL_CHAR.length; i++ )
172         {
173             final int specialLen = SPECIAL_CHAR[i].length();
174             int t = 0;
175             // search the nearset instance of this marker...
176             while ( t != -1 && ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
177             {
178                 // and check if it at the begining of a word.
179                 if ( t == 0 || isSpace( line.charAt( t - 1 ) ) || isParenthesis( line.charAt( t - 1 ) ) )
180                 {
181                     // if it is, and if, check to avoid going beyond the string
182                     if ( t + specialLen < line.length() )
183                     {
184                         // and if character after the format marker is another
185                         // marker, is an error, and should be ignored
186                         if ( isSpecial( line.charAt( t + specialLen ) ) )
187                         {
188                             t += specialLen;
189                         }
190                         else
191                         {
192                             // else we find a starter!
193                             break;
194                         }
195                     }
196                     else
197                     {
198                         t = -1;
199                     }
200                 }
201                 else
202                 {
203                     t += specialLen;
204                 }
205             }
206             lhOffsets[i] = t;
207         }
208 
209         // for each text format markers...
210         for ( int i = 0; i < lhOffsets.length; i++ )
211         {
212             final int specialLen = SPECIAL_CHAR[i].length();
213             // if we found a text format beginning
214             if ( lhOffsets[i] != -1 )
215             {
216                 int t = lhOffsets[i] + specialLen;
217                 // search for a text format ending
218                 while ( ( t = line.indexOf( SPECIAL_CHAR[i], t ) ) != -1 )
219                 {
220                     // must be side by side to a word
221                     final char c = line.charAt( t - 1 );
222                     if ( t > 0 && !isSpace( c ) && !isSpecial( c ) )
223                     {
224                         break;
225                     }
226                     else
227                     {
228                         t += specialLen;
229                     }
230                 }
231                 rhOffsets[i] = t;
232             }
233         }
234 
235         // find the nearest index
236         int minIndex = -1;
237         int charType = 0;
238         for ( int i = 0; i < lhOffsets.length; i++ )
239         {
240             if ( lhOffsets[i] != -1 && rhOffsets[i] != 1 )
241             {
242                 if ( minIndex == -1 || lhOffsets[i] < minIndex )
243                 {
244                     if ( rhOffsets[i] > lhOffsets[i] )
245                     {
246                         // ej: "mary *has a little lamb"
247                         minIndex = lhOffsets[i];
248                         charType = i;
249                     }
250                 }
251             }
252         }
253 
254         if ( minIndex == -1 )
255         {
256             ret.addAll( textParser.parse( line ) );
257         }
258         else
259         {
260             int len = SPECIAL_CHAR[charType].length();
261             ret.addAll( parseFormat( line.substring( 0, minIndex ) ) );
262             ret.add( FACTORY_MAP.get( SPECIAL_CHAR[charType] )
263                      .createBlock( parseFormat( line.substring( minIndex + len, rhOffsets[charType] ) )
264                                    .toArray( new Block[] {} ) ) );
265             ret.addAll( parseFormat( line.substring( rhOffsets[charType] + len ) ) );
266         }
267 
268         // profit
269         return ret;
270     }
271 
272     /**
273      * @param c character to test
274      * @return <code>true</code> if c is a parenthesis
275      */
276     private boolean isParenthesis( final char c )
277     {
278         return c == '(' || c == ')';
279     }
280 
281     /**
282      * Sets the formatTextParser.
283      *
284      * @param textParser text parser to use
285      *                   <code>TextParser</code> with the formatTextParser.
286      */
287     public final void setTextParser( final TextParser textParser )
288     {
289         if ( textParser == null )
290         {
291             throw new IllegalArgumentException( "argument can't be null" );
292         }
293 
294         this.textParser = textParser;
295     }
296 }
297 
298 /**
299  * @author Juan F. Codagnone
300  * @version $Id: FormatedTextParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
301  */
302 interface FormatBlockFactory
303 {
304     /**
305      * factory method of format <code>Block</code>
306      *
307      * @param childrens children of the format block
308      * @return a format block
309      */
310     Block createBlock( final Block[] childrens );
311 }