1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedOutputStream;
23 import java.io.DataOutput;
24 import java.io.DataOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.List;
30 import java.util.zip.GZIPOutputStream;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.StoredField;
34 import org.apache.lucene.index.IndexOptions;
35 import org.apache.lucene.index.IndexReader;
36 import org.apache.lucene.index.IndexableField;
37 import org.apache.lucene.index.MultiBits;
38 import org.apache.lucene.util.Bits;
39 import org.apache.maven.index.ArtifactInfo;
40 import org.apache.maven.index.IndexerField;
41 import org.apache.maven.index.context.DefaultIndexingContext;
42 import org.apache.maven.index.context.IndexingContext;
43
44
45
46
47
48
49 public class IndexDataWriter
50 {
51 static final int VERSION = 1;
52
53 static final int F_INDEXED = 1;
54
55 static final int F_TOKENIZED = 2;
56
57 static final int F_STORED = 4;
58
59 static final int F_COMPRESSED = 8;
60
61 private final DataOutputStream dos;
62
63 private final GZIPOutputStream gos;
64
65 private final BufferedOutputStream bos;
66
67 private boolean descriptorWritten;
68
69 public IndexDataWriter( OutputStream os )
70 throws IOException
71 {
72 bos = new BufferedOutputStream( os, 1024 * 8 );
73 gos = new GZIPOutputStream( bos, 1024 * 2 );
74 dos = new DataOutputStream( gos );
75
76 this.descriptorWritten = false;
77 }
78
79 public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes )
80 throws IOException
81 {
82 writeHeader( context );
83
84 int n = writeDocuments( indexReader, docIndexes );
85
86 writeGroupFields( context );
87
88 close();
89
90 return n;
91 }
92
93 public void close()
94 throws IOException
95 {
96 dos.flush();
97
98 gos.flush();
99 gos.finish();
100
101 bos.flush();
102 }
103
104 public void writeHeader( IndexingContext context )
105 throws IOException
106 {
107 dos.writeByte( VERSION );
108
109 Date timestamp = context.getTimestamp();
110 dos.writeLong( timestamp == null ? -1 : timestamp.getTime() );
111 }
112
113 public void writeGroupFields( IndexingContext context )
114 throws IOException
115 {
116 {
117 List<IndexableField> allGroupsFields = new ArrayList<>( 2 );
118 allGroupsFields.add( new Field( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE,
119 IndexerField.KEYWORD_STORED ) );
120 allGroupsFields.add( new StoredField( ArtifactInfo.ALL_GROUPS_LIST,
121 ArtifactInfo.lst2str( context.getAllGroups() ), IndexerField.KEYWORD_STORED ) );
122 writeDocumentFields( allGroupsFields );
123 }
124
125 {
126 List<IndexableField> rootGroupsFields = new ArrayList<>( 2 );
127 rootGroupsFields.add( new Field( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE,
128 IndexerField.KEYWORD_STORED ) );
129 rootGroupsFields.add( new StoredField( ArtifactInfo.ROOT_GROUPS_LIST,
130 ArtifactInfo.lst2str( context.getRootGroups() ), IndexerField.KEYWORD_STORED ) );
131 writeDocumentFields( rootGroupsFields );
132 }
133 }
134
135 public int writeDocuments( IndexReader r, List<Integer> docIndexes )
136 throws IOException
137 {
138 int n = 0;
139 Bits liveDocs = MultiBits.getLiveDocs( r );
140
141 if ( docIndexes == null )
142 {
143 for ( int i = 0; i < r.maxDoc(); i++ )
144 {
145 if ( liveDocs == null || liveDocs.get( i ) )
146 {
147 if ( writeDocument( r.document( i ) ) )
148 {
149 n++;
150 }
151 }
152 }
153 }
154 else
155 {
156 for ( int i : docIndexes )
157 {
158 if ( liveDocs == null || liveDocs.get( i ) )
159 {
160 if ( writeDocument( r.document( i ) ) )
161 {
162 n++;
163 }
164 }
165 }
166 }
167
168 return n;
169 }
170
171 public boolean writeDocument( final Document document )
172 throws IOException
173 {
174 List<IndexableField> fields = document.getFields();
175
176 List<IndexableField> storedFields = new ArrayList<>( fields.size() );
177
178 for ( IndexableField field : fields )
179 {
180 if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) )
181 {
182 if ( descriptorWritten )
183 {
184 return false;
185 }
186 else
187 {
188 descriptorWritten = true;
189 }
190 }
191
192 if ( field.fieldType().stored() )
193 {
194 storedFields.add( field );
195 }
196 }
197
198 writeDocumentFields( storedFields );
199
200 return true;
201 }
202
203 public void writeDocumentFields( List<IndexableField> fields )
204 throws IOException
205 {
206 dos.writeInt( fields.size() );
207
208 for ( IndexableField field : fields )
209 {
210 writeField( field );
211 }
212 }
213
214 public void writeField( IndexableField field )
215 throws IOException
216 {
217 int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0 )
218 + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 )
219 + ( field.fieldType().stored() ? F_STORED : 0 );
220
221
222 String name = field.name();
223 String value = field.stringValue();
224
225 dos.write( flags );
226 dos.writeUTF( name );
227 writeUTF( value, dos );
228 }
229
230 private static void writeUTF( String str, DataOutput out )
231 throws IOException
232 {
233 int strlen = str.length();
234 int utflen = 0;
235 int c;
236
237
238 for ( int i = 0; i < strlen; i++ )
239 {
240 c = str.charAt( i );
241 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
242 {
243 utflen++;
244 }
245 else if ( c > 0x07FF )
246 {
247 utflen += 3;
248 }
249 else
250 {
251 utflen += 2;
252 }
253 }
254
255
256 out.writeInt( utflen );
257
258 byte[] bytearr = new byte[utflen];
259
260 int count = 0;
261
262 int i = 0;
263 for ( ; i < strlen; i++ )
264 {
265 c = str.charAt( i );
266 if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
267 {
268 break;
269 }
270 bytearr[count++] = (byte) c;
271 }
272
273 for ( ; i < strlen; i++ )
274 {
275 c = str.charAt( i );
276 if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
277 {
278 bytearr[count++] = (byte) c;
279
280 }
281 else if ( c > 0x07FF )
282 {
283 bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
284 bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
285 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
286 }
287 else
288 {
289 bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
290 bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
291 }
292 }
293
294 out.write( bytearr, 0, utflen );
295 }
296
297 }