1 package org.apache.maven.scm.provider.git.gitexe.command.changelog;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import org.apache.maven.scm.ChangeFile;
23 import org.apache.maven.scm.ChangeSet;
24 import org.apache.maven.scm.ScmFileStatus;
25 import org.apache.maven.scm.log.ScmLogger;
26 import org.apache.maven.scm.util.AbstractConsumer;
27
28 import java.util.ArrayList;
29 import java.util.Calendar;
30 import java.util.Date;
31 import java.util.List;
32 import java.util.Locale;
33 import java.util.TimeZone;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 /**
38 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
39 * @author Olivier Lamy
40 *
41 */
42 public class GitChangeLogConsumer
43 extends AbstractConsumer
44 {
45 /**
46 * Date formatter for git timestamp
47 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
48 */
49 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
50
51 /**
52 * State machine constant: expecting header
53 */
54 private static final int STATUS_GET_HEADER = 1;
55
56 /**
57 * State machine constant: expecting author information
58 */
59 private static final int STATUS_GET_AUTHOR = 2;
60
61 /**
62 * State machine constant: expecting parent hash information
63 */
64 private static final int STATUS_RAW_TREE = 21;
65
66 /**
67 * State machine constant: expecting parent hash information
68 */
69 private static final int STATUS_RAW_PARENT = 22;
70
71 /**
72 * State machine constant: expecting author name, email and timestamp information
73 */
74 private static final int STATUS_RAW_AUTHOR = 23;
75
76 /**
77 * State machine constant: expecting committer name, email and timestamp information
78 */
79 private static final int STATUS_RAW_COMMITTER = 24;
80
81 /**
82 * State machine constant: expecting date information
83 */
84 private static final int STATUS_GET_DATE = 3;
85
86 /**
87 * State machine constant: expecting file information
88 */
89 private static final int STATUS_GET_FILE = 4;
90
91 /**
92 * State machine constant: expecting comments
93 */
94 private static final int STATUS_GET_COMMENT = 5;
95
96 /**
97 * The pattern used to match git header lines
98 */
99 private static final Pattern HEADER_PATTERN = Pattern.compile( "^commit (.*)" );
100
101 /**
102 * The pattern used to match git author lines
103 */
104 private static final Pattern AUTHOR_PATTERN = Pattern.compile( "^Author: (.*)" );
105
106 /**
107 * The pattern used to match git tree hash lines (raw mode)
108 */
109 private static final Pattern RAW_TREE_PATTERN = Pattern.compile( "^tree ([A-Fa-f0-9]+)" );
110
111 /**
112 * The pattern used to match git parent hash lines (raw mode)
113 */
114 private static final Pattern RAW_PARENT_PATTERN = Pattern.compile( "^parent ([A-Fa-f0-9]+)" );
115
116 /**
117 * The pattern used to match git author lines (raw mode)
118 */
119 private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile( "^author (.+ <.+>) ([0-9]+) (.*)" );
120
121 /**
122 * The pattern used to match git author lines (raw mode)
123 */
124 private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile( "^committer (.+ <.+>) ([0-9]+) (.*)" );
125
126 /**
127 * The pattern used to match git date lines
128 */
129 private static final Pattern DATE_PATTERN = Pattern.compile( "^Date:\\s*(.*)" );
130
131 /**
132 * The pattern used to match git file lines
133 */
134 private static final Pattern FILE_PATTERN =
135 Pattern.compile( "^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?" );
136
137 /**
138 * Current status of the parser
139 */
140 private int status = STATUS_GET_HEADER;
141
142 /**
143 * List of change log entries
144 */
145 private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146
147 /**
148 * The current log entry being processed by the parser
149 */
150 private ChangeSet currentChange;
151
152 /**
153 * The current revision of the entry being processed by the parser
154 */
155 private String currentRevision;
156
157 /**
158 * The current comment of the entry being processed by the parser
159 */
160 private StringBuilder currentComment;
161
162 private String userDateFormat;
163
164 /**
165 * Default constructor.
166 */
167 public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
168 {
169 super( logger );
170
171 this.userDateFormat = userDateFormat;
172 }
173
174 public List<ChangeSet> getModifications()
175 {
176 // this is needed since the processFile does not always get a the end-sequence correctly.
177 processGetFile( "" );
178
179 return entries;
180 }
181
182 // ----------------------------------------------------------------------
183 // StreamConsumer Implementation
184 // ----------------------------------------------------------------------
185
186 /**
187 * {@inheritDoc}
188 */
189 public void consumeLine( String line )
190 {
191 switch ( status )
192 {
193 case STATUS_GET_HEADER:
194 processGetHeader( line );
195 break;
196 case STATUS_GET_AUTHOR:
197 processGetAuthor( line );
198 break;
199 case STATUS_GET_DATE:
200 processGetDate( line, null );
201 break;
202 case STATUS_GET_COMMENT:
203 processGetComment( line );
204 break;
205 case STATUS_GET_FILE:
206 processGetFile( line );
207 break;
208 case STATUS_RAW_TREE:
209 processGetRawTree( line );
210 break;
211 case STATUS_RAW_PARENT:
212 processGetRawParent( line );
213 break;
214 case STATUS_RAW_AUTHOR:
215 processGetRawAuthor( line );
216 break;
217 case STATUS_RAW_COMMITTER:
218 processGetRawCommitter( line );
219 break;
220 default:
221 throw new IllegalStateException( "Unknown state: " + status );
222 }
223 }
224
225 // ----------------------------------------------------------------------
226 //
227 // ----------------------------------------------------------------------
228
229 /**
230 * Process the current input line in the GET_HEADER state. The
231 * author, date, and the revision of the entry are gathered. Note,
232 * Git does not have per-file revisions, instead, the entire
233 * branch is given a single revision number, which is also used for
234 * the revision number of each file.
235 *
236 * @param line A line of text from the git log output
237 */
238 private void processGetHeader( String line )
239 {
240 Matcher matcher = HEADER_PATTERN.matcher( line );
241 if ( !matcher.matches() )
242 {
243 return;
244 }
245
246 currentRevision = matcher.group( 1 );
247
248 currentChange = new ChangeSet();
249
250 currentChange.setRevision( currentRevision );
251
252 status = STATUS_GET_AUTHOR;
253 }
254
255 /**
256 * Process the current input line in the STATUS_GET_AUTHOR state. This
257 * state gathers all of the author information that are part of a log entry.
258 *
259 * @param line a line of text from the git log output
260 */
261 private void processGetAuthor( String line )
262 {
263 // this autodetects 'raw' format
264 if ( RAW_TREE_PATTERN.matcher( line ).matches() )
265 {
266 status = STATUS_RAW_TREE;
267 processGetRawTree( line );
268 return;
269 }
270
271 Matcher matcher = AUTHOR_PATTERN.matcher( line );
272 if ( !matcher.matches() )
273 {
274 return;
275 }
276 String author = matcher.group( 1 );
277
278 currentChange.setAuthor( author );
279
280 status = STATUS_GET_DATE;
281 }
282
283 /**
284 * Process the current input line in the STATUS_RAW_TREE state. This
285 * state gathers tree hash part of a log entry.
286 *
287 * @param line a line of text from the git log output
288 */
289 private void processGetRawTree( String line )
290 {
291 if ( !RAW_TREE_PATTERN.matcher( line ).matches() )
292 {
293 return;
294 }
295 //here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
296 status = STATUS_RAW_PARENT;
297 }
298
299 /**
300 * Process the current input line in the STATUS_RAW_PARENT state. This
301 * state gathers parent revisions of a log entry.
302 *
303 * @param line a line of text from the git log output
304 */
305 private void processGetRawParent( String line )
306 {
307 Matcher matcher = RAW_PARENT_PATTERN.matcher( line );
308 if ( !matcher.matches() )
309 {
310 status = STATUS_RAW_AUTHOR;
311 processGetRawAuthor( line );
312 return;
313 }
314 String parentHash = matcher.group( 1 );
315
316 addParentRevision( parentHash );
317 }
318
319 /**
320 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
321 * log. This method takes care of the difference.
322 *
323 * @param hash -
324 */
325 private void addParentRevision( String hash )
326 {
327 if ( currentChange.getParentRevision() == null )
328 {
329 currentChange.setParentRevision( hash );
330 }
331 else
332 {
333 currentChange.addMergedRevision( hash );
334 }
335 }
336
337 /**
338 * Process the current input line in the STATUS_RAW_AUTHOR state. This
339 * state gathers all the author information of a log entry.
340 *
341 * @param line a line of text from the git log output
342 */
343 private void processGetRawAuthor( String line )
344 {
345 Matcher matcher = RAW_AUTHOR_PATTERN.matcher( line );
346 if ( !matcher.matches() )
347 {
348 return;
349 }
350 String author = matcher.group( 1 );
351 currentChange.setAuthor( author );
352
353 String datestring = matcher.group( 2 );
354 String tz = matcher.group( 3 );
355
356 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
357 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
358 Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
359 c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
360 currentChange.setDate( c.getTime() );
361
362 status = STATUS_RAW_COMMITTER;
363 }
364
365 /**
366 * Process the current input line in the STATUS_RAW_AUTHOR state. This
367 * state gathers all the committer information of a log entry.
368 *
369 * @param line a line of text from the git log output
370 */
371 private void processGetRawCommitter( String line )
372 {
373 if ( !RAW_COMMITTER_PATTERN.matcher( line ).matches() )
374 {
375 return;
376 }
377 // here we could set committer and committerDate, the same way as in processGetRawAuthor
378 status = STATUS_GET_COMMENT;
379 }
380
381 /**
382 * Process the current input line in the STATUS_GET_DATE state. This
383 * state gathers all of the date information that are part of a log entry.
384 *
385 * @param line a line of text from the git log output
386 */
387 private void processGetDate( String line, Locale locale )
388 {
389 Matcher matcher = DATE_PATTERN.matcher( line );
390 if ( !matcher.matches() )
391 {
392 return;
393 }
394
395 String datestring = matcher.group( 1 );
396
397 Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
398
399 currentChange.setDate( date );
400
401 status = STATUS_GET_COMMENT;
402 }
403
404 /**
405 * Process the current input line in the GET_COMMENT state. This
406 * state gathers all of the comments that are part of a log entry.
407 *
408 * @param line a line of text from the git log output
409 */
410 private void processGetComment( String line )
411 {
412 if ( line.length() < 4 )
413 {
414 if ( currentComment == null )
415 {
416 currentComment = new StringBuilder();
417 }
418 else
419 {
420 currentChange.setComment( currentComment.toString() );
421 status = STATUS_GET_FILE;
422 }
423 }
424 else
425 {
426 if ( currentComment.length() > 0 )
427 {
428 currentComment.append( '\n' );
429 }
430
431 currentComment.append( line.substring( 4 ) );
432 }
433 }
434
435 /**
436 * Process the current input line in the GET_FILE state. This state
437 * adds each file entry line to the current change log entry. Note,
438 * the revision number for the entire entry is used for the revision
439 * number of each file.
440 *
441 * @param line A line of text from the git log output
442 */
443 private void processGetFile( String line )
444 {
445 if ( line.length() == 0 )
446 {
447 if ( currentChange != null )
448 {
449 entries.add( currentChange );
450 }
451
452 resetChangeLog();
453
454 status = STATUS_GET_HEADER;
455 }
456 else
457 {
458 Matcher matcher = FILE_PATTERN.matcher( line );
459 if ( !matcher.matches() )
460 {
461 return;
462 }
463 final String actionChar = matcher.group( 1 );
464 // action is currently not used
465 final ScmFileStatus action;
466 String name = matcher.group( 2 );
467 String originalName = null;
468 String originalRevision = null;
469 if ( "A".equals( actionChar ) )
470 {
471 action = ScmFileStatus.ADDED;
472 }
473 else if ( "M".equals( actionChar ) )
474 {
475 action = ScmFileStatus.MODIFIED;
476 }
477 else if ( "D".equals( actionChar ) )
478 {
479 action = ScmFileStatus.DELETED;
480 }
481 else if ( "R".equals( actionChar ) )
482 {
483 action = ScmFileStatus.RENAMED;
484 originalName = name;
485 name = matcher.group( 4 );
486 originalRevision = currentChange.getParentRevision();
487 }
488 else if ( "C".equals( actionChar ) )
489 {
490 action = ScmFileStatus.COPIED;
491 originalName = name;
492 name = matcher.group( 4 );
493 originalRevision = currentChange.getParentRevision();
494 }
495 else
496 {
497 action = ScmFileStatus.UNKNOWN;
498 }
499
500 final ChangeFile changeFile = new ChangeFile( name, currentRevision );
501 changeFile.setAction( action );
502 changeFile.setOriginalName( originalName );
503 changeFile.setOriginalRevision( originalRevision );
504 currentChange.addFile( changeFile );
505 }
506 }
507
508 private void resetChangeLog()
509 {
510 currentComment = null;
511 currentChange = null;
512 }
513 }