001 package org.apache.maven.scm.provider.git.gitexe.command.changelog;
002
003 /*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements. See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership. The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License. You may obtain a copy of the License at
011 *
012 * http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied. See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022 import org.apache.maven.scm.ChangeFile;
023 import org.apache.maven.scm.ChangeSet;
024 import org.apache.maven.scm.ScmFileStatus;
025 import org.apache.maven.scm.log.ScmLogger;
026 import org.apache.maven.scm.util.AbstractConsumer;
027 import org.apache.regexp.RE;
028 import org.apache.regexp.RESyntaxException;
029
030 import java.util.ArrayList;
031 import java.util.Calendar;
032 import java.util.Date;
033 import java.util.List;
034 import java.util.Locale;
035 import java.util.TimeZone;
036
037 /**
038 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
039 * @author Olivier Lamy
040 *
041 */
042 public class GitChangeLogConsumer
043 extends AbstractConsumer
044 {
045 /**
046 * Date formatter for git timestamp
047 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
048 */
049 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
050
051 /**
052 * State machine constant: expecting header
053 */
054 private static final int STATUS_GET_HEADER = 1;
055
056 /**
057 * State machine constant: expecting author information
058 */
059 private static final int STATUS_GET_AUTHOR = 2;
060
061 /**
062 * State machine constant: expecting parent hash information
063 */
064 private static final int STATUS_RAW_TREE = 21;
065
066 /**
067 * State machine constant: expecting parent hash information
068 */
069 private static final int STATUS_RAW_PARENT = 22;
070
071 /**
072 * State machine constant: expecting author name, email and timestamp information
073 */
074 private static final int STATUS_RAW_AUTHOR = 23;
075
076 /**
077 * State machine constant: expecting committer name, email and timestamp information
078 */
079 private static final int STATUS_RAW_COMMITTER = 24;
080
081 /**
082 * State machine constant: expecting date information
083 */
084 private static final int STATUS_GET_DATE = 3;
085
086 /**
087 * State machine constant: expecting file information
088 */
089 private static final int STATUS_GET_FILE = 4;
090
091 /**
092 * State machine constant: expecting comments
093 */
094 private static final int STATUS_GET_COMMENT = 5;
095
096 /**
097 * The pattern used to match git header lines
098 */
099 private static final String HEADER_PATTERN = "^commit (.*)";
100
101 /**
102 * The pattern used to match git author lines
103 */
104 private static final String AUTHOR_PATTERN = "^Author: (.*)";
105
106 /**
107 * The pattern used to match git tree hash lines (raw mode)
108 */
109 private static final String RAW_TREE_PATTERN = "^tree ([:xdigit:]+)";
110
111 /**
112 * The pattern used to match git parent hash lines (raw mode)
113 */
114 private static final String RAW_PARENT_PATTERN = "^parent ([:xdigit:]+)";
115
116 /**
117 * The pattern used to match git author lines (raw mode)
118 */
119 private static final String RAW_AUTHOR_PATTERN = "^author (.+ <.+>) ([:digit:]+) (.*)";
120
121 /**
122 * The pattern used to match git author lines (raw mode)
123 */
124 private static final String RAW_COMMITTER_PATTERN = "^committer (.+ <.+>) ([:digit:]+) (.*)";
125
126 /**
127 * The pattern used to match git date lines
128 */
129 private static final String DATE_PATTERN = "^Date:\\s*(.*)";
130
131 /**
132 * The pattern used to match git file lines
133 */
134 private static final String FILE_PATTERN =
135 "^:\\d* \\d* [:xdigit:]*\\.* [:xdigit:]*\\.* ([:upper:])[:digit:]*\\t([^\\t]*)(\\t(.*))?";
136
137 /**
138 * Current status of the parser
139 */
140 private int status = STATUS_GET_HEADER;
141
142 /**
143 * List of change log entries
144 */
145 private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146
147 /**
148 * The current log entry being processed by the parser
149 */
150 private ChangeSet currentChange;
151
152 /**
153 * The current revision of the entry being processed by the parser
154 */
155 private String currentRevision;
156
157 /**
158 * The current comment of the entry being processed by the parser
159 */
160 private StringBuilder currentComment;
161
162 /**
163 * The regular expression used to match header lines
164 */
165 private RE headerRegexp;
166
167 /**
168 * The regular expression used to match author lines
169 */
170 private RE authorRegexp;
171
172 /**
173 * The regular expression used to match tree hash lines in raw mode
174 */
175 private RE rawTreeRegexp;
176
177 /**
178 * The regular expression used to match parent hash lines in raw mode
179 */
180 private RE rawParentRegexp;
181
182 /**
183 * The regular expression used to match author lines in raw mode
184 */
185 private RE rawAuthorRegexp;
186
187 /**
188 * The regular expression used to match committer lines in raw mode
189 */
190 private RE rawCommitterRegexp;
191
192 /**
193 * The regular expression used to match date lines
194 */
195 private RE dateRegexp;
196
197 /**
198 * The regular expression used to match file lines
199 */
200 private RE fileRegexp;
201
202 private String userDateFormat;
203
204 /**
205 * Default constructor.
206 */
207 public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
208 {
209 super( logger );
210
211 this.userDateFormat = userDateFormat;
212
213 try
214 {
215 headerRegexp = new RE( HEADER_PATTERN );
216 authorRegexp = new RE( AUTHOR_PATTERN );
217 dateRegexp = new RE( DATE_PATTERN );
218 fileRegexp = new RE( FILE_PATTERN );
219 rawTreeRegexp = new RE( RAW_TREE_PATTERN );
220 rawParentRegexp = new RE( RAW_PARENT_PATTERN );
221 rawAuthorRegexp = new RE( RAW_AUTHOR_PATTERN );
222 rawCommitterRegexp = new RE( RAW_COMMITTER_PATTERN );
223 }
224 catch ( RESyntaxException ex )
225 {
226 throw new RuntimeException(
227 "INTERNAL ERROR: Could not create regexp to parse git log file. This shouldn't happen. Something is probably wrong with the oro installation.",
228 ex );
229 }
230 }
231
232 public List<ChangeSet> getModifications()
233 {
234 // this is needed since the processFile does not always get a the end-sequence correctly.
235 processGetFile( "" );
236
237 return entries;
238 }
239
240 // ----------------------------------------------------------------------
241 // StreamConsumer Implementation
242 // ----------------------------------------------------------------------
243
244 /**
245 * {@inheritDoc}
246 */
247 public void consumeLine( String line )
248 {
249 switch ( status )
250 {
251 case STATUS_GET_HEADER:
252 processGetHeader( line );
253 break;
254 case STATUS_GET_AUTHOR:
255 processGetAuthor( line );
256 break;
257 case STATUS_GET_DATE:
258 processGetDate( line, null );
259 break;
260 case STATUS_GET_COMMENT:
261 processGetComment( line );
262 break;
263 case STATUS_GET_FILE:
264 processGetFile( line );
265 break;
266 case STATUS_RAW_TREE:
267 processGetRawTree( line );
268 break;
269 case STATUS_RAW_PARENT:
270 processGetRawParent( line );
271 break;
272 case STATUS_RAW_AUTHOR:
273 processGetRawAuthor( line );
274 break;
275 case STATUS_RAW_COMMITTER:
276 processGetRawCommitter( line );
277 break;
278 default:
279 throw new IllegalStateException( "Unknown state: " + status );
280 }
281 }
282
283 // ----------------------------------------------------------------------
284 //
285 // ----------------------------------------------------------------------
286
287 /**
288 * Process the current input line in the GET_HEADER state. The
289 * author, date, and the revision of the entry are gathered. Note,
290 * Git does not have per-file revisions, instead, the entire
291 * branch is given a single revision number, which is also used for
292 * the revision number of each file.
293 *
294 * @param line A line of text from the git log output
295 */
296 private void processGetHeader( String line )
297 {
298 if ( !headerRegexp.match( line ) )
299 {
300 return;
301 }
302
303 currentRevision = headerRegexp.getParen( 1 );
304
305 currentChange = new ChangeSet();
306
307 currentChange.setRevision( currentRevision );
308
309 status = STATUS_GET_AUTHOR;
310 }
311
312 /**
313 * Process the current input line in the STATUS_GET_AUTHOR state. This
314 * state gathers all of the author information that are part of a log entry.
315 *
316 * @param line a line of text from the git log output
317 */
318 private void processGetAuthor( String line )
319 {
320 // this autodetects 'raw' format
321 if ( rawTreeRegexp.match( line ) )
322 {
323 status = STATUS_RAW_TREE;
324 processGetRawTree( line );
325 return;
326 }
327
328 if ( !authorRegexp.match( line ) )
329 {
330 return;
331 }
332 String author = authorRegexp.getParen( 1 );
333
334 currentChange.setAuthor( author );
335
336 status = STATUS_GET_DATE;
337 }
338
339 /**
340 * Process the current input line in the STATUS_RAW_TREE state. This
341 * state gathers tree hash part of a log entry.
342 *
343 * @param line a line of text from the git log output
344 */
345 private void processGetRawTree( String line )
346 {
347 if ( !rawTreeRegexp.match( line ) )
348 {
349 return;
350 }
351 //here we could set treeHash if it appears in the model: currentChange.setTreeHash( rawTreeRegexp.getParen( 1 ) );
352 status = STATUS_RAW_PARENT;
353 }
354
355 /**
356 * Process the current input line in the STATUS_RAW_PARENT state. This
357 * state gathers parent revisions of a log entry.
358 *
359 * @param line a line of text from the git log output
360 */
361 private void processGetRawParent( String line )
362 {
363 if ( !rawParentRegexp.match( line ) )
364 {
365 status = STATUS_RAW_AUTHOR;
366 processGetRawAuthor( line );
367 return;
368 }
369 String parentHash = rawParentRegexp.getParen( 1 );
370
371 addParentRevision( parentHash );
372 }
373
374 /**
375 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log.
376 * This method takes care of the difference.
377 *
378 * @param hash -
379 */
380 private void addParentRevision( String hash )
381 {
382 if ( currentChange.getParentRevision() == null )
383 {
384 currentChange.setParentRevision( hash );
385 }
386 else
387 {
388 currentChange.addMergedRevision( hash );
389 }
390 }
391
392 /**
393 * Process the current input line in the STATUS_RAW_AUTHOR state. This
394 * state gathers all the author information of a log entry.
395 *
396 * @param line a line of text from the git log output
397 */
398 private void processGetRawAuthor( String line )
399 {
400 if ( !rawAuthorRegexp.match( line ) )
401 {
402 return;
403 }
404 String author = rawAuthorRegexp.getParen( 1 );
405 currentChange.setAuthor( author );
406
407 String datestring = rawAuthorRegexp.getParen( 2 );
408 String tz = rawAuthorRegexp.getParen( 3 );
409
410 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
411 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
412 Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
413 c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
414 currentChange.setDate( c.getTime() );
415
416 status = STATUS_RAW_COMMITTER;
417 }
418
419 /**
420 * Process the current input line in the STATUS_RAW_AUTHOR state. This
421 * state gathers all the committer information of a log entry.
422 *
423 * @param line a line of text from the git log output
424 */
425 private void processGetRawCommitter( String line )
426 {
427 if ( !rawCommitterRegexp.match( line ) )
428 {
429 return;
430 }
431 // here we could set committer and committerDate, the same way as in processGetRawAuthor
432 status = STATUS_GET_COMMENT;
433 }
434
435 /**
436 * Process the current input line in the STATUS_GET_DATE state. This
437 * state gathers all of the date information that are part of a log entry.
438 *
439 * @param line a line of text from the git log output
440 */
441 private void processGetDate( String line, Locale locale )
442 {
443 if ( !dateRegexp.match( line ) )
444 {
445 return;
446 }
447
448 String datestring = dateRegexp.getParen( 1 );
449
450 Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
451
452 currentChange.setDate( date );
453
454 status = STATUS_GET_COMMENT;
455 }
456
457 /**
458 * Process the current input line in the GET_COMMENT state. This
459 * state gathers all of the comments that are part of a log entry.
460 *
461 * @param line a line of text from the git log output
462 */
463 private void processGetComment( String line )
464 {
465 if ( line.length() < 4 )
466 {
467 if ( currentComment == null )
468 {
469 currentComment = new StringBuilder();
470 }
471 else
472 {
473 currentChange.setComment( currentComment.toString() );
474 status = STATUS_GET_FILE;
475 }
476 }
477 else
478 {
479 if ( currentComment.length() > 0 )
480 {
481 currentComment.append( '\n' );
482 }
483
484 currentComment.append( line.substring( 4 ) );
485 }
486 }
487
488 /**
489 * Process the current input line in the GET_FILE state. This state
490 * adds each file entry line to the current change log entry. Note,
491 * the revision number for the entire entry is used for the revision
492 * number of each file.
493 *
494 * @param line A line of text from the git log output
495 */
496 private void processGetFile( String line )
497 {
498 if ( line.length() == 0 )
499 {
500 if ( currentChange != null )
501 {
502 entries.add( currentChange );
503 }
504
505 resetChangeLog();
506
507 status = STATUS_GET_HEADER;
508 }
509 else
510 {
511 if ( !fileRegexp.match( line ) )
512 {
513 return;
514 }
515 final String actionChar = fileRegexp.getParen( 1 );
516 // action is currently not used
517 final ScmFileStatus action;
518 String name = fileRegexp.getParen( 2 );
519 String originalName = null;
520 String originalRevision = null;
521 if ( "A".equals( actionChar ) )
522 {
523 action = ScmFileStatus.ADDED;
524 }
525 else if ( "M".equals( actionChar ) )
526 {
527 action = ScmFileStatus.MODIFIED;
528 }
529 else if ( "D".equals( actionChar ) )
530 {
531 action = ScmFileStatus.DELETED;
532 }
533 else if ( "R".equals( actionChar ) )
534 {
535 action = ScmFileStatus.RENAMED;
536 originalName = name;
537 name = fileRegexp.getParen( 4 );
538 originalRevision = currentChange.getParentRevision();
539 }
540 else if ( "C".equals( actionChar ) )
541 {
542 action = ScmFileStatus.COPIED;
543 originalName = name;
544 name = fileRegexp.getParen( 4 );
545 originalRevision = currentChange.getParentRevision();
546 }
547 else
548 {
549 action = ScmFileStatus.UNKNOWN;
550 }
551
552 final ChangeFile changeFile = new ChangeFile( name, currentRevision );
553 changeFile.setAction( action );
554 changeFile.setOriginalName( originalName );
555 changeFile.setOriginalRevision( originalRevision );
556 currentChange.addFile( changeFile );
557 }
558 }
559
560 private void resetChangeLog()
561 {
562 currentComment = null;
563 currentChange = null;
564 }
565 }