View Javadoc
1   package org.apache.maven.index.context;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.apache.lucene.analysis.Analyzer;
23  import org.apache.lucene.analysis.AnalyzerWrapper;
24  import org.apache.lucene.analysis.Tokenizer;
25  import org.apache.lucene.analysis.LowerCaseFilter;
26  import org.apache.lucene.analysis.util.CharTokenizer;
27  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
28  
29  /**
30   * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
31   * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
32   * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
33   * 
34   * @author Eugene Kuleshov
35   * @author cstamas
36   */
37  public final class NexusAnalyzer
38      extends AnalyzerWrapper
39  {
40      private static final Analyzer CLASS_NAMES_ANALYZER = new Analyzer()
41      {
42          @Override
43          protected TokenStreamComponents createComponents( String fieldName )
44          {
45              final Tokenizer tokenizer = new DeprecatedClassnamesTokenizer();
46              return new TokenStreamComponents( tokenizer, new LowerCaseFilter( tokenizer ) );
47          }
48      };
49  
50      private static final Analyzer LETTER_OR_DIGIT_ANALYZER = new Analyzer()
51      {
52          @Override
53          protected TokenStreamComponents createComponents( String filedName )
54          {
55              final Tokenizer tokenizer = new LetterOrDigitTokenizer();
56              return new TokenStreamComponents( tokenizer, new LowerCaseFilter( tokenizer ) );
57          }
58      };
59  
60      public NexusAnalyzer()
61      {
62          super( PER_FIELD_REUSE_STRATEGY );
63      }
64  
65      @Override
66      protected Analyzer getWrappedAnalyzer( String fieldName )
67      {
68          if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( fieldName ) )
69          {
70              // To keep "backward" compatibility, we have to use old flawed tokenizer.
71              return CLASS_NAMES_ANALYZER;
72          }
73          else
74          {
75              return LETTER_OR_DIGIT_ANALYZER;
76          }
77      }
78  
79      // ==
80  
81      public static class NoopTokenizer
82          extends CharTokenizer
83      {
84          public NoopTokenizer()
85          {
86              super();
87          }
88  
89          @Override
90          protected boolean isTokenChar( int i )
91          {
92              return true;
93          }
94      }
95  
96      @Deprecated
97      public static class DeprecatedClassnamesTokenizer
98          extends CharTokenizer
99      {
100         public DeprecatedClassnamesTokenizer()
101         {
102             super();
103         }
104         
105         @Override
106         protected boolean isTokenChar( int i )
107         {
108             return i != '\n';
109         }
110     }
111 
112     public static class LetterOrDigitTokenizer
113         extends CharTokenizer
114     {
115         public LetterOrDigitTokenizer()
116         {
117             super();
118         }
119 
120         @Override
121         protected boolean isTokenChar( int c )
122         {
123             return Character.isLetterOrDigit( c );
124         }
125     }
126 }