1 package org.apache.maven.index.context;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.io.Reader;
23
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.CharTokenizer;
26 import org.apache.lucene.analysis.TokenStream;
27 import org.apache.lucene.analysis.Tokenizer;
28 import org.apache.maven.index.creator.JarFileContentsIndexCreator;
29
30 /**
31 * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
32 * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
33 * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
34 *
35 * @author Eugene Kuleshov
36 * @author cstamas
37 */
38 public final class NexusAnalyzer
39 extends Analyzer
40 {
41 public TokenStream tokenStream( String fieldName, Reader reader )
42 {
43 return getTokenizer( fieldName, reader );
44 }
45
46 protected Tokenizer getTokenizer( String fieldName, Reader reader )
47 {
48 if ( JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals( fieldName ) )
49 {
50 // To keep "backward" compatibility, we have to use old flawed tokenizer.
51 return new DeprecatedClassnamesTokenizer( reader );
52 }
53 else
54 {
55 return new LetterOrDigitTokenizer( reader );
56 }
57 }
58
59 // ==
60
61 public static class NoopTokenizer
62 extends CharTokenizer
63 {
64 public NoopTokenizer( Reader in )
65 {
66 super( in );
67 }
68
69 @Override
70 protected boolean isTokenChar( char c )
71 {
72 return true;
73 }
74 }
75
76 @Deprecated
77 public static class DeprecatedClassnamesTokenizer
78 extends CharTokenizer
79 {
80 public DeprecatedClassnamesTokenizer( Reader in )
81 {
82 super( in );
83 }
84
85 @Override
86 protected boolean isTokenChar( char c )
87 {
88 return c != '\n';
89 }
90
91 @Override
92 protected char normalize( char c )
93 {
94 return Character.toLowerCase( c );
95 }
96 }
97
98 public static class LetterOrDigitTokenizer
99 extends CharTokenizer
100 {
101 public LetterOrDigitTokenizer( Reader in )
102 {
103 super( in );
104 }
105
106 @Override
107 protected boolean isTokenChar( char c )
108 {
109 return Character.isLetterOrDigit( c );
110 }
111
112 @Override
113 protected char normalize( char c )
114 {
115 return Character.toLowerCase( c );
116 }
117 }
118
119 }