1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedOutputStream;
23 import java.io.DataOutput;
24 import java.io.DataOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32 import java.util.zip.GZIPOutputStream;
33 import org.apache.lucene.document.Document;
34 import org.apache.lucene.document.Field;
35 import org.apache.lucene.index.IndexOptions;
36 import org.apache.lucene.index.IndexReader;
37 import org.apache.lucene.index.IndexableField;
38 import org.apache.lucene.index.MultiBits;
39 import org.apache.lucene.util.Bits;
40 import org.apache.maven.index.ArtifactInfo;
41 import org.apache.maven.index.IndexerField;
42 import org.apache.maven.index.context.DefaultIndexingContext;
43 import org.apache.maven.index.context.IndexingContext;
44
45
46
47
48
49
50 public class IndexDataWriter
51 {
52 static final int VERSION = 1;
53
54 static final int F_INDEXED = 1;
55
56 static final int F_TOKENIZED = 2;
57
58 static final int F_STORED = 4;
59
60 static final int F_COMPRESSED = 8;
61
62 private final DataOutputStream dos;
63
64 private final GZIPOutputStream gos;
65
66 private final BufferedOutputStream bos;
67
68 private final Set<String> allGroups;
69
70 private final Set<String> rootGroups;
71
72 private boolean descriptorWritten;
73
74 public IndexDataWriter( OutputStream os )
75 throws IOException
76 {
77 bos = new BufferedOutputStream( os, 1024 * 8 );
78 gos = new GZIPOutputStream( bos, 1024 * 2 );
79 dos = new DataOutputStream( gos );
80
81 this.allGroups = new HashSet<>();
82 this.rootGroups = new HashSet<>();
83 this.descriptorWritten = false;
84 }
85
86 public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
87 throws IOException
88 {
89 writeHeader( context );
90
91 int n = writeDocuments( indexReader, docIndexes );
92
93 writeGroupFields();
94
95 close();
96
97 return n;
98 }
99
100 public void close()
101 throws IOException
102 {
103 dos.flush();
104
105 gos.flush();
106 gos.finish();
107
108 bos.flush();
109 }
110
111 public void writeHeader( IndexingContext context )
112 throws IOException
113 {
114 dos.writeByte( VERSION );
115
116 Date timestamp = context.getTimestamp();
117 dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
118 }
119
120 public void writeGroupFields()
121 throws IOException
122 {
123 {
124 List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
125 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE,
126 IndexerField.KEYWORD_STORED ) );
127 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ),
128 IndexerField.KEYWORD_STORED ) );
129 writeDocumentFields( allGroupsFields );
130 }
131
132 {
133 List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
134 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
135 IndexerField.KEYWORD_STORED ) );
136 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ),
137 IndexerField.KEYWORD_STORED ) );
138 writeDocumentFields( rootGroupsFields );
139 }
140 }
141
142 public int writeDocuments( IndexReader r, List<Integer> docIndexes )
143 throws IOException
144 {
145 int n = 0;
146 Bits liveDocs = MultiBits.getLiveDocs( r );
147
148 if ( docIndexes == null )
149 {
150 for ( int i = 0; i < r.maxDoc(); i++ )
151 {
152 if ( liveDocs == null || liveDocs.get( i ) )
153 {
154 if ( writeDocument( r.document( i ) ) )
155 {
156 n++;
157 }
158 }
159 }
160 }
161 else
162 {
163 for ( int i : docIndexes )
164 {
165 if ( liveDocs == null || liveDocs.get( i ) )
166 {
167 if ( writeDocument( r.document( i ) ) )
168 {
169 n++;
170 }
171 }
172 }
173 }
174
175 return n;
176 }
177
178 public boolean writeDocument( final Document document )
179 throws IOException
180 {
181 List<IndexableField> fields = document.getFields();
182
183 List<IndexableField> storedFields = new ArrayList<>( fields.size() );
184
185 for ( IndexableField field : fields )
186 {
187 if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
188 {
189 if ( descriptorWritten )
190 {
191 return false;
192 }
193 else
194 {
195 descriptorWritten = true;
196 }
197 }
198
199 if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) )
200 {
201 final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST );
202
203 if ( groupList != null && groupList.trim().length() > 0 )
204 {
205 allGroups.addAll( ArtifactInfo.str2lst( groupList ) );
206 }
207
208 return false;
209 }
210
211 if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) )
212 {
213 final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST );
214
215 if ( groupList != null && groupList.trim().length() > 0 )
216 {
217 rootGroups.addAll( ArtifactInfo.str2lst( groupList ) );
218 }
219
220 return false;
221 }
222
223 if ( field.fieldType().stored() )
224 {
225 storedFields.add( field );
226 }
227 }
228
229 writeDocumentFields( storedFields );
230
231 return true;
232 }
233
234 public void writeDocumentFields( List<IndexableField> fields )
235 throws IOException
236 {
237 dos.writeInt( fields.size() );
238
239 for ( IndexableField field : fields )
240 {
241 writeField( field );
242 }
243 }
244
245 public void writeField( IndexableField field )
246 throws IOException
247 {
248 int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0 )
249 + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 )
250 + ( field.fieldType().stored() ? F_STORED : 0 );
251
252
253 String name = field.name();
254 String value = field.stringValue();
255
256 dos.write( flags );
257 dos.writeUTF( name );
258 writeUTF( value, dos );
259 }
260
261 private static void writeUTF( String str, DataOutput out )
262 throws IOException
263 {
264 int strlen = str.length();
265 int utflen = 0;
266 int c;
267
268
269 for ( int i = 0; i < strlen; i++ )
270 {
271 c = str.charAt( i );
272 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
273 {
274 utflen++;
275 }
276 else if ( c > 0x07FF )
277 {
278 utflen += 3;
279 }
280 else
281 {
282 utflen += 2;
283 }
284 }
285
286
287 out.writeInt( utflen );
288
289 byte[] bytearr = new byte[utflen];
290
291 int count = 0;
292
293 int i = 0;
294 for ( ; i < strlen; i++ )
295 {
296 c = str.charAt( i );
297 if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
298 {
299 break;
300 }
301 bytearr[count++] = (byte) c;
302 }
303
304 for ( ; i < strlen; i++ )
305 {
306 c = str.charAt( i );
307 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
308 {
309 bytearr[count++] = (byte) c;
310
311 }
312 else if ( c > 0x07FF )
313 {
314 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
315 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
316 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
317 }
318 else
319 {
320 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
321 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
322 }
323 }
324
325 out.write( bytearr, 0, utflen );
326 }
327
328 }