View Javadoc

1   package org.apache.maven.index.context;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Reader;
23  
24  import org.apache.lucene.analysis.Analyzer;
25  import org.apache.lucene.analysis.CharTokenizer;
26  import org.apache.lucene.analysis.TokenStream;
27  import org.apache.lucene.analysis.Tokenizer;
28  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
29  
30  /**
31   * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
32   * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
33   * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
34   * 
35   * @author Eugene Kuleshov
36   * @author cstamas
37   */
38  public final class NexusAnalyzer
39      extends Analyzer
40  {
41      public TokenStream tokenStream( String fieldName, Reader reader )
42      {
43          return getTokenizer( fieldName, reader );
44      }
45  
46      protected Tokenizer getTokenizer( String fieldName, Reader reader )
47      {
48          if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( fieldName ) )
49          {
50              // To keep "backward" compatibility, we have to use old flawed tokenizer.
51              return new DeprecatedClassnamesTokenizer( reader );
52          }
53          else
54          {
55              return new LetterOrDigitTokenizer( reader );
56          }
57      }
58  
59      // ==
60  
61      public static class NoopTokenizer
62          extends CharTokenizer
63      {
64          public NoopTokenizer( Reader in )
65          {
66              super( in );
67          }
68  
69          @Override
70          protected boolean isTokenChar( char c )
71          {
72              return true;
73          }
74      }
75  
76      @Deprecated
77      public static class DeprecatedClassnamesTokenizer
78          extends CharTokenizer
79      {
80          public DeprecatedClassnamesTokenizer( Reader in )
81          {
82              super( in );
83          }
84  
85          @Override
86          protected boolean isTokenChar( char c )
87          {
88              return c != '\n';
89          }
90  
91          @Override
92          protected char normalize( char c )
93          {
94              return Character.toLowerCase( c );
95          }
96      }
97  
98      public static class LetterOrDigitTokenizer
99          extends CharTokenizer
100     {
101         public LetterOrDigitTokenizer( Reader in )
102         {
103             super( in );
104         }
105 
106         @Override
107         protected boolean isTokenChar( char c )
108         {
109             return Character.isLetterOrDigit( c );
110         }
111 
112         @Override
113         protected char normalize( char c )
114         {
115             return Character.toLowerCase( c );
116         }
117     }
118 
119 }