1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedOutputStream;
23 import java.io.DataOutput;
24 import java.io.DataOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.zip.GZIPOutputStream;
33
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.document.Fieldable;
37 import org.apache.lucene.index.IndexReader;
38 import org.apache.lucene.search.IndexSearcher;
39 import org.apache.maven.index.ArtifactInfo;
40 import org.apache.maven.index.context.DefaultIndexingContext;
41 import org.apache.maven.index.context.IndexingContext;
42
43
44
45
46
47
48 public class IndexDataWriter
49 {
50 static final int VERSION = 1;
51
52 static final int F_INDEXED = 1;
53
54 static final int F_TOKENIZED = 2;
55
56 static final int F_STORED = 4;
57
58 static final int F_COMPRESSED = 8;
59
60 private final DataOutputStream dos;
61
62 private final GZIPOutputStream gos;
63
64 private final BufferedOutputStream bos;
65
66 private final Set<String> allGroups;
67
68 private final Set<String> rootGroups;
69
70 private boolean descriptorWritten;
71
72 public IndexDataWriter( OutputStream os )
73 throws IOException
74 {
75 bos = new BufferedOutputStream( os, 1024 * 8 );
76 gos = new GZIPOutputStream( bos, 1024 * 2 );
77 dos = new DataOutputStream( gos );
78
79 this.allGroups = new HashSet<String>();
80 this.rootGroups = new HashSet<String>();
81 this.descriptorWritten = false;
82 }
83
84 public int write( IndexingContext context, List<Integer> docIndexes )
85 throws IOException
86 {
87 writeHeader( context );
88
89 int n = 0;
90 final IndexSearcher indexSearcher = context.acquireIndexSearcher();
91 try
92 {
93 n = writeDocuments( indexSearcher.getIndexReader(), docIndexes );
94 }
95 finally
96 {
97 context.releaseIndexSearcher( indexSearcher );
98 }
99
100 writeGroupFields();
101
102 close();
103
104 return n;
105 }
106
107 public void close()
108 throws IOException
109 {
110 dos.flush();
111
112 gos.flush();
113 gos.finish();
114
115 bos.flush();
116 }
117
118 public void writeHeader( IndexingContext context )
119 throws IOException
120 {
121 dos.writeByte( VERSION );
122
123 Date timestamp = context.getTimestamp();
124 dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
125 }
126
127 public void writeGroupFields()
128 throws IOException
129 {
130 {
131 List<Fieldable> allGroupsFields = new ArrayList<Fieldable>( 2 );
132 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Field.Store.YES,
133 Field.Index.NOT_ANALYZED ) );
134 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
135 Field.Store.YES, Field.Index.NO ) );
136 writeDocumentFields( allGroupsFields );
137 }
138
139 {
140 List<Fieldable> rootGroupsFields = new ArrayList<Fieldable>( 2 );
141 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Field.Store.YES,
142 Field.Index.NOT_ANALYZED ) );
143 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
144 Field.Store.YES, Field.Index.NO ) );
145 writeDocumentFields( rootGroupsFields );
146 }
147 }
148
149 public int writeDocuments( IndexReader r, List<Integer> docIndexes )
150 throws IOException
151 {
152 int n = 0;
153
154 if ( docIndexes == null )
155 {
156 for ( int i = 0; i < r.maxDoc(); i++ )
157 {
158 if ( !r.isDeleted( i ) )
159 {
160 if ( writeDocument( r.document( i ) ) )
161 {
162 n++;
163 }
164 }
165 }
166 }
167 else
168 {
169 for ( int i : docIndexes )
170 {
171 if ( !r.isDeleted( i ) )
172 {
173 if ( writeDocument( r.document( i ) ) )
174 {
175 n++;
176 }
177 }
178 }
179 }
180
181 return n;
182 }
183
184 public boolean writeDocument( final Document document )
185 throws IOException
186 {
187 List<Fieldable> fields = document.getFields();
188
189 List<Fieldable> storedFields = new ArrayList<Fieldable>( fields.size() );
190
191 for ( Fieldable field : fields )
192 {
193 if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
194 {
195 if ( descriptorWritten )
196 {
197 return false;
198 }
199 else
200 {
201 descriptorWritten = true;
202 }
203 }
204
205 if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
206 {
207 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
208
209 if ( groupList != null && groupList.trim().length() > 0 )
210 {
211 allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
212 }
213
214 return false;
215 }
216
217 if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
218 {
219 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
220
221 if ( groupList != null && groupList.trim().length() > 0 )
222 {
223 rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
224 }
225
226 return false;
227 }
228
229 if ( field.isStored() )
230 {
231 storedFields.add( field );
232 }
233 }
234
235 writeDocumentFields( storedFields );
236
237 return true;
238 }
239
240 public void writeDocumentFields( List<Fieldable> fields )
241 throws IOException
242 {
243 dos.writeInt( fields.size() );
244
245 for ( Fieldable field : fields )
246 {
247 writeField( field );
248 }
249 }
250
251 public void writeField( Fieldable field )
252 throws IOException
253 {
254 int flags = ( field.isIndexed() ? F_INDEXED : 0 )
255 + ( field.isTokenized() ? F_TOKENIZED : 0 )
256 + ( field.isStored() ? F_STORED : 0 );
257
258
259 String name = field.name();
260 String value = field.stringValue();
261
262 dos.write( flags );
263 dos.writeUTF( name );
264 writeUTF( value, dos );
265 }
266
267 private static void writeUTF( String str, DataOutput out )
268 throws IOException
269 {
270 int strlen = str.length();
271 int utflen = 0;
272 int c;
273
274
275 for ( int i = 0; i < strlen; i++ )
276 {
277 c = str.charAt( i );
278 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
279 {
280 utflen++;
281 }
282 else if ( c > 0x07FF )
283 {
284 utflen += 3;
285 }
286 else
287 {
288 utflen += 2;
289 }
290 }
291
292
293 out.writeInt( utflen );
294
295 byte[] bytearr = new byte[utflen];
296
297 int count = 0;
298
299 int i = 0;
300 for ( ; i < strlen; i++ )
301 {
302 c = str.charAt( i );
303 if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
304 {
305 break;
306 }
307 bytearr[count++] = (byte) c;
308 }
309
310 for ( ; i < strlen; i++ )
311 {
312 c = str.charAt( i );
313 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
314 {
315 bytearr[count++] = (byte) c;
316
317 }
318 else if ( c > 0x07FF )
319 {
320 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
321 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
322 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
323 }
324 else
325 {
326 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
327 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) );
328 }
329 }
330
331 out.write( bytearr, 0, utflen );
332 }
333
334 }