001package org.apache.maven.scm.provider.git.gitexe.command.changelog; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import org.apache.maven.scm.ChangeFile; 023import org.apache.maven.scm.ChangeSet; 024import org.apache.maven.scm.ScmFileStatus; 025import org.apache.maven.scm.log.ScmLogger; 026import org.apache.maven.scm.util.AbstractConsumer; 027 028import java.util.ArrayList; 029import java.util.Calendar; 030import java.util.Date; 031import java.util.List; 032import java.util.Locale; 033import java.util.TimeZone; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036 037/** 038 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a> 039 * @author Olivier Lamy 040 * 041 */ 042public class GitChangeLogConsumer 043 extends AbstractConsumer 044{ 045 /** 046 * Date formatter for git timestamp 047 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200 048 */ 049 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z"; 050 051 /** 052 * State machine constant: expecting header 053 */ 054 private static final int STATUS_GET_HEADER = 1; 055 056 /** 057 * State machine constant: expecting author information 058 */ 059 private static final int STATUS_GET_AUTHOR = 2; 060 061 /** 062 * State machine constant: expecting parent hash information 063 */ 064 private static final int STATUS_RAW_TREE = 21; 065 066 /** 067 * State machine constant: expecting parent hash information 068 */ 069 private static final int STATUS_RAW_PARENT = 22; 070 071 /** 072 * State machine constant: expecting author name, email and timestamp information 073 */ 074 private static final int STATUS_RAW_AUTHOR = 23; 075 076 /** 077 * State machine constant: expecting committer name, email and timestamp information 078 */ 079 private static final int STATUS_RAW_COMMITTER = 24; 080 081 /** 082 * State machine constant: expecting date information 083 */ 084 private static final int STATUS_GET_DATE = 3; 085 086 /** 087 * State machine constant: expecting file information 088 */ 089 private static final int STATUS_GET_FILE = 4; 090 091 /** 092 * State machine constant: expecting comments 093 */ 094 private static final int STATUS_GET_COMMENT = 5; 095 096 /** 097 * The pattern used to match git header lines 098 */ 099 private static final Pattern HEADER_PATTERN = Pattern.compile( "^commit (.*)" ); 100 101 /** 102 * The pattern used to match git author lines 103 */ 104 private static final Pattern AUTHOR_PATTERN = Pattern.compile( "^Author: (.*)" ); 105 106 /** 107 * The pattern used to match git tree hash lines (raw mode) 108 */ 109 private static final Pattern RAW_TREE_PATTERN = Pattern.compile( "^tree ([A-Fa-f0-9]+)" ); 110 111 /** 112 * The pattern used to match git parent hash lines (raw mode) 113 */ 114 private static final Pattern RAW_PARENT_PATTERN = Pattern.compile( "^parent ([A-Fa-f0-9]+)" ); 115 116 /** 117 * The pattern used to match git author lines (raw mode) 118 */ 119 private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile( "^author (.+ <.+>) ([0-9]+) (.*)" ); 120 121 /** 122 * The pattern used to match git author lines (raw mode) 123 */ 124 private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile( "^committer (.+ <.+>) ([0-9]+) (.*)" ); 125 126 /** 127 * The pattern used to match git date lines 128 */ 129 private static final Pattern DATE_PATTERN = Pattern.compile( "^Date:\\s*(.*)" ); 130 131 /** 132 * The pattern used to match git file lines 133 */ 134 private static final Pattern FILE_PATTERN = 135 Pattern.compile( "^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?" ); 136 137 /** 138 * Current status of the parser 139 */ 140 private int status = STATUS_GET_HEADER; 141 142 /** 143 * List of change log entries 144 */ 145 private List<ChangeSet> entries = new ArrayList<ChangeSet>(); 146 147 /** 148 * The current log entry being processed by the parser 149 */ 150 private ChangeSet currentChange; 151 152 /** 153 * The current revision of the entry being processed by the parser 154 */ 155 private String currentRevision; 156 157 /** 158 * The current comment of the entry being processed by the parser 159 */ 160 private StringBuilder currentComment; 161 162 private String userDateFormat; 163 164 /** 165 * Default constructor. 166 */ 167 public GitChangeLogConsumer( ScmLogger logger, String userDateFormat ) 168 { 169 super( logger ); 170 171 this.userDateFormat = userDateFormat; 172 } 173 174 public List<ChangeSet> getModifications() 175 { 176 // this is needed since the processFile does not always get a the end-sequence correctly. 177 processGetFile( "" ); 178 179 return entries; 180 } 181 182 // ---------------------------------------------------------------------- 183 // StreamConsumer Implementation 184 // ---------------------------------------------------------------------- 185 186 /** 187 * {@inheritDoc} 188 */ 189 public void consumeLine( String line ) 190 { 191 switch ( status ) 192 { 193 case STATUS_GET_HEADER: 194 processGetHeader( line ); 195 break; 196 case STATUS_GET_AUTHOR: 197 processGetAuthor( line ); 198 break; 199 case STATUS_GET_DATE: 200 processGetDate( line, null ); 201 break; 202 case STATUS_GET_COMMENT: 203 processGetComment( line ); 204 break; 205 case STATUS_GET_FILE: 206 processGetFile( line ); 207 break; 208 case STATUS_RAW_TREE: 209 processGetRawTree( line ); 210 break; 211 case STATUS_RAW_PARENT: 212 processGetRawParent( line ); 213 break; 214 case STATUS_RAW_AUTHOR: 215 processGetRawAuthor( line ); 216 break; 217 case STATUS_RAW_COMMITTER: 218 processGetRawCommitter( line ); 219 break; 220 default: 221 throw new IllegalStateException( "Unknown state: " + status ); 222 } 223 } 224 225 // ---------------------------------------------------------------------- 226 // 227 // ---------------------------------------------------------------------- 228 229 /** 230 * Process the current input line in the GET_HEADER state. The 231 * author, date, and the revision of the entry are gathered. Note, 232 * Git does not have per-file revisions, instead, the entire 233 * branch is given a single revision number, which is also used for 234 * the revision number of each file. 235 * 236 * @param line A line of text from the git log output 237 */ 238 private void processGetHeader( String line ) 239 { 240 Matcher matcher = HEADER_PATTERN.matcher( line ); 241 if ( !matcher.matches() ) 242 { 243 return; 244 } 245 246 currentRevision = matcher.group( 1 ); 247 248 currentChange = new ChangeSet(); 249 250 currentChange.setRevision( currentRevision ); 251 252 status = STATUS_GET_AUTHOR; 253 } 254 255 /** 256 * Process the current input line in the STATUS_GET_AUTHOR state. This 257 * state gathers all of the author information that are part of a log entry. 258 * 259 * @param line a line of text from the git log output 260 */ 261 private void processGetAuthor( String line ) 262 { 263 // this autodetects 'raw' format 264 if ( RAW_TREE_PATTERN.matcher( line ).matches() ) 265 { 266 status = STATUS_RAW_TREE; 267 processGetRawTree( line ); 268 return; 269 } 270 271 Matcher matcher = AUTHOR_PATTERN.matcher( line ); 272 if ( !matcher.matches() ) 273 { 274 return; 275 } 276 String author = matcher.group( 1 ); 277 278 currentChange.setAuthor( author ); 279 280 status = STATUS_GET_DATE; 281 } 282 283 /** 284 * Process the current input line in the STATUS_RAW_TREE state. This 285 * state gathers tree hash part of a log entry. 286 * 287 * @param line a line of text from the git log output 288 */ 289 private void processGetRawTree( String line ) 290 { 291 if ( !RAW_TREE_PATTERN.matcher( line ).matches() ) 292 { 293 return; 294 } 295 //here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) ); 296 status = STATUS_RAW_PARENT; 297 } 298 299 /** 300 * Process the current input line in the STATUS_RAW_PARENT state. This 301 * state gathers parent revisions of a log entry. 302 * 303 * @param line a line of text from the git log output 304 */ 305 private void processGetRawParent( String line ) 306 { 307 Matcher matcher = RAW_PARENT_PATTERN.matcher( line ); 308 if ( !matcher.matches() ) 309 { 310 status = STATUS_RAW_AUTHOR; 311 processGetRawAuthor( line ); 312 return; 313 } 314 String parentHash = matcher.group( 1 ); 315 316 addParentRevision( parentHash ); 317 } 318 319 /** 320 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the 321 * log. This method takes care of the difference. 322 * 323 * @param hash - 324 */ 325 private void addParentRevision( String hash ) 326 { 327 if ( currentChange.getParentRevision() == null ) 328 { 329 currentChange.setParentRevision( hash ); 330 } 331 else 332 { 333 currentChange.addMergedRevision( hash ); 334 } 335 } 336 337 /** 338 * Process the current input line in the STATUS_RAW_AUTHOR state. This 339 * state gathers all the author information of a log entry. 340 * 341 * @param line a line of text from the git log output 342 */ 343 private void processGetRawAuthor( String line ) 344 { 345 Matcher matcher = RAW_AUTHOR_PATTERN.matcher( line ); 346 if ( !matcher.matches() ) 347 { 348 return; 349 } 350 String author = matcher.group( 1 ); 351 currentChange.setAuthor( author ); 352 353 String datestring = matcher.group( 2 ); 354 String tz = matcher.group( 3 ); 355 356 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time 357 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here 358 Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) ); 359 c.setTimeInMillis( Long.parseLong( datestring ) * 1000 ); 360 currentChange.setDate( c.getTime() ); 361 362 status = STATUS_RAW_COMMITTER; 363 } 364 365 /** 366 * Process the current input line in the STATUS_RAW_AUTHOR state. This 367 * state gathers all the committer information of a log entry. 368 * 369 * @param line a line of text from the git log output 370 */ 371 private void processGetRawCommitter( String line ) 372 { 373 if ( !RAW_COMMITTER_PATTERN.matcher( line ).matches() ) 374 { 375 return; 376 } 377 // here we could set committer and committerDate, the same way as in processGetRawAuthor 378 status = STATUS_GET_COMMENT; 379 } 380 381 /** 382 * Process the current input line in the STATUS_GET_DATE state. This 383 * state gathers all of the date information that are part of a log entry. 384 * 385 * @param line a line of text from the git log output 386 */ 387 private void processGetDate( String line, Locale locale ) 388 { 389 Matcher matcher = DATE_PATTERN.matcher( line ); 390 if ( !matcher.matches() ) 391 { 392 return; 393 } 394 395 String datestring = matcher.group( 1 ); 396 397 Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale ); 398 399 currentChange.setDate( date ); 400 401 status = STATUS_GET_COMMENT; 402 } 403 404 /** 405 * Process the current input line in the GET_COMMENT state. This 406 * state gathers all of the comments that are part of a log entry. 407 * 408 * @param line a line of text from the git log output 409 */ 410 private void processGetComment( String line ) 411 { 412 if ( line.length() < 4 ) 413 { 414 if ( currentComment == null ) 415 { 416 currentComment = new StringBuilder(); 417 } 418 else 419 { 420 currentChange.setComment( currentComment.toString() ); 421 status = STATUS_GET_FILE; 422 } 423 } 424 else 425 { 426 if ( currentComment.length() > 0 ) 427 { 428 currentComment.append( '\n' ); 429 } 430 431 currentComment.append( line.substring( 4 ) ); 432 } 433 } 434 435 /** 436 * Process the current input line in the GET_FILE state. This state 437 * adds each file entry line to the current change log entry. Note, 438 * the revision number for the entire entry is used for the revision 439 * number of each file. 440 * 441 * @param line A line of text from the git log output 442 */ 443 private void processGetFile( String line ) 444 { 445 if ( line.length() == 0 ) 446 { 447 if ( currentChange != null ) 448 { 449 entries.add( currentChange ); 450 } 451 452 resetChangeLog(); 453 454 status = STATUS_GET_HEADER; 455 } 456 else 457 { 458 Matcher matcher = FILE_PATTERN.matcher( line ); 459 if ( !matcher.matches() ) 460 { 461 return; 462 } 463 final String actionChar = matcher.group( 1 ); 464 // action is currently not used 465 final ScmFileStatus action; 466 String name = matcher.group( 2 ); 467 String originalName = null; 468 String originalRevision = null; 469 if ( "A".equals( actionChar ) ) 470 { 471 action = ScmFileStatus.ADDED; 472 } 473 else if ( "M".equals( actionChar ) ) 474 { 475 action = ScmFileStatus.MODIFIED; 476 } 477 else if ( "D".equals( actionChar ) ) 478 { 479 action = ScmFileStatus.DELETED; 480 } 481 else if ( "R".equals( actionChar ) ) 482 { 483 action = ScmFileStatus.RENAMED; 484 originalName = name; 485 name = matcher.group( 4 ); 486 originalRevision = currentChange.getParentRevision(); 487 } 488 else if ( "C".equals( actionChar ) ) 489 { 490 action = ScmFileStatus.COPIED; 491 originalName = name; 492 name = matcher.group( 4 ); 493 originalRevision = currentChange.getParentRevision(); 494 } 495 else 496 { 497 action = ScmFileStatus.UNKNOWN; 498 } 499 500 final ChangeFile changeFile = new ChangeFile( name, currentRevision ); 501 changeFile.setAction( action ); 502 changeFile.setOriginalName( originalName ); 503 changeFile.setOriginalRevision( originalRevision ); 504 currentChange.addFile( changeFile ); 505 } 506 } 507 508 private void resetChangeLog() 509 { 510 currentComment = null; 511 currentChange = null; 512 } 513}