001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.maven.scm.provider.git.gitexe.command.changelog; 020 021import java.util.ArrayList; 022import java.util.Calendar; 023import java.util.Date; 024import java.util.List; 025import java.util.Locale; 026import java.util.TimeZone; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.maven.scm.ChangeFile; 031import org.apache.maven.scm.ChangeSet; 032import org.apache.maven.scm.ScmFileStatus; 033import org.apache.maven.scm.util.AbstractConsumer; 034 035/** 036 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a> 037 * @author Olivier Lamy 038 * 039 */ 040public class GitChangeLogConsumer extends AbstractConsumer { 041 /** 042 * Date formatter for git timestamp 043 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200 044 */ 045 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z"; 046 047 /** 048 * State machine constant: expecting header 049 */ 050 private static final int STATUS_GET_HEADER = 1; 051 052 /** 053 * State machine constant: expecting author information 054 */ 055 private static final int STATUS_GET_AUTHOR = 2; 056 057 /** 058 * State machine constant: expecting parent hash information 059 */ 060 private static final int STATUS_RAW_TREE = 21; 061 062 /** 063 * State machine constant: expecting parent hash information 064 */ 065 private static final int STATUS_RAW_PARENT = 22; 066 067 /** 068 * State machine constant: expecting author name, email and timestamp information 069 */ 070 private static final int STATUS_RAW_AUTHOR = 23; 071 072 /** 073 * State machine constant: expecting committer name, email and timestamp information 074 */ 075 private static final int STATUS_RAW_COMMITTER = 24; 076 077 /** 078 * State machine constant: expecting date information 079 */ 080 private static final int STATUS_GET_DATE = 3; 081 082 /** 083 * State machine constant: expecting file information 084 */ 085 private static final int STATUS_GET_FILE = 4; 086 087 /** 088 * State machine constant: expecting comments 089 */ 090 private static final int STATUS_GET_COMMENT = 5; 091 092 /** 093 * The pattern used to match git header lines 094 */ 095 private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$"); 096 097 /** 098 * The pattern used to match git author lines 099 */ 100 private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)"); 101 102 /** 103 * The pattern used to match git tree hash lines (raw mode) 104 */ 105 private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)"); 106 107 /** 108 * The pattern used to match git parent hash lines (raw mode) 109 */ 110 private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)"); 111 112 /** 113 * The pattern used to match git author lines (raw mode) 114 */ 115 private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)"); 116 117 /** 118 * The pattern used to match git author lines (raw mode) 119 */ 120 private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)"); 121 122 /** 123 * The pattern used to match git date lines 124 */ 125 private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)"); 126 127 /** 128 * The pattern used to match git file lines 129 */ 130 private static final Pattern FILE_PATTERN = 131 Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?"); 132 133 /** 134 * Current status of the parser 135 */ 136 private int status = STATUS_GET_HEADER; 137 138 /** 139 * List of change log entries 140 */ 141 private final List<ChangeSet> entries = new ArrayList<>(); 142 143 /** 144 * The current log entry being processed by the parser 145 */ 146 private ChangeSet currentChange; 147 148 /** 149 * The current revision of the entry being processed by the parser 150 */ 151 private String currentRevision; 152 153 /** 154 * The current comment of the entry being processed by the parser 155 */ 156 private StringBuilder currentComment; 157 158 private final String userDateFormat; 159 160 /** 161 * Default constructor. 162 */ 163 public GitChangeLogConsumer(String userDateFormat) { 164 this.userDateFormat = userDateFormat; 165 } 166 167 public List<ChangeSet> getModifications() { 168 // this is needed since the processFile does not always get a the end-sequence correctly. 169 processGetFile(""); 170 171 return entries; 172 } 173 174 // ---------------------------------------------------------------------- 175 // StreamConsumer Implementation 176 // ---------------------------------------------------------------------- 177 178 /** 179 * {@inheritDoc} 180 */ 181 public void consumeLine(String line) { 182 switch (status) { 183 case STATUS_GET_HEADER: 184 processGetHeader(line); 185 break; 186 case STATUS_GET_AUTHOR: 187 processGetAuthor(line); 188 break; 189 case STATUS_GET_DATE: 190 processGetDate(line, null); 191 break; 192 case STATUS_GET_COMMENT: 193 processGetComment(line); 194 break; 195 case STATUS_GET_FILE: 196 processGetFile(line); 197 break; 198 case STATUS_RAW_TREE: 199 processGetRawTree(line); 200 break; 201 case STATUS_RAW_PARENT: 202 processGetRawParent(line); 203 break; 204 case STATUS_RAW_AUTHOR: 205 processGetRawAuthor(line); 206 break; 207 case STATUS_RAW_COMMITTER: 208 processGetRawCommitter(line); 209 break; 210 default: 211 throw new IllegalStateException("Unknown state: " + status); 212 } 213 } 214 215 // ---------------------------------------------------------------------- 216 // 217 // ---------------------------------------------------------------------- 218 219 /** 220 * Process the current input line in the GET_HEADER state. The 221 * author, date, and the revision of the entry are gathered. Note, 222 * Git does not have per-file revisions, instead, the entire 223 * branch is given a single revision number, which is also used for 224 * the revision number of each file. 225 * 226 * @param line A line of text from the git log output 227 */ 228 private void processGetHeader(String line) { 229 Matcher matcher = HEADER_PATTERN.matcher(line); 230 if (!matcher.matches()) { 231 return; 232 } 233 234 currentRevision = matcher.group(1); 235 236 currentChange = new ChangeSet(); 237 238 currentChange.setRevision(currentRevision); 239 240 // Extract the tags (if present) 241 String tagList = matcher.group(2); 242 if (tagList != null) { 243 String[] rawTags = tagList.split(","); 244 for (String rawTag : rawTags) { 245 String[] tagParts = rawTag.trim().split(":"); 246 if (tagParts.length == 2 && "tag".equals(tagParts[0])) { 247 currentChange.addTag(tagParts[1].trim()); 248 } 249 } 250 } 251 252 status = STATUS_GET_AUTHOR; 253 } 254 255 /** 256 * Process the current input line in the STATUS_GET_AUTHOR state. This 257 * state gathers all of the author information that are part of a log entry. 258 * 259 * @param line a line of text from the git log output 260 */ 261 private void processGetAuthor(String line) { 262 // this autodetects 'raw' format 263 if (RAW_TREE_PATTERN.matcher(line).matches()) { 264 status = STATUS_RAW_TREE; 265 processGetRawTree(line); 266 return; 267 } 268 269 Matcher matcher = AUTHOR_PATTERN.matcher(line); 270 if (!matcher.matches()) { 271 return; 272 } 273 String author = matcher.group(1); 274 275 currentChange.setAuthor(author); 276 277 status = STATUS_GET_DATE; 278 } 279 280 /** 281 * Process the current input line in the STATUS_RAW_TREE state. This 282 * state gathers tree hash part of a log entry. 283 * 284 * @param line a line of text from the git log output 285 */ 286 private void processGetRawTree(String line) { 287 if (!RAW_TREE_PATTERN.matcher(line).matches()) { 288 return; 289 } 290 // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) ); 291 status = STATUS_RAW_PARENT; 292 } 293 294 /** 295 * Process the current input line in the STATUS_RAW_PARENT state. This 296 * state gathers parent revisions of a log entry. 297 * 298 * @param line a line of text from the git log output 299 */ 300 private void processGetRawParent(String line) { 301 Matcher matcher = RAW_PARENT_PATTERN.matcher(line); 302 if (!matcher.matches()) { 303 status = STATUS_RAW_AUTHOR; 304 processGetRawAuthor(line); 305 return; 306 } 307 String parentHash = matcher.group(1); 308 309 addParentRevision(parentHash); 310 } 311 312 /** 313 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the 314 * log. This method takes care of the difference. 315 * 316 * @param hash - 317 */ 318 private void addParentRevision(String hash) { 319 if (currentChange.getParentRevision() == null) { 320 currentChange.setParentRevision(hash); 321 } else { 322 currentChange.addMergedRevision(hash); 323 } 324 } 325 326 /** 327 * Process the current input line in the STATUS_RAW_AUTHOR state. This 328 * state gathers all the author information of a log entry. 329 * 330 * @param line a line of text from the git log output 331 */ 332 private void processGetRawAuthor(String line) { 333 Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line); 334 if (!matcher.matches()) { 335 return; 336 } 337 String author = matcher.group(1); 338 currentChange.setAuthor(author); 339 340 String datestring = matcher.group(2); 341 String tz = matcher.group(3); 342 343 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time 344 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here 345 Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz)); 346 c.setTimeInMillis(Long.parseLong(datestring) * 1000); 347 currentChange.setDate(c.getTime()); 348 349 status = STATUS_RAW_COMMITTER; 350 } 351 352 /** 353 * Process the current input line in the STATUS_RAW_AUTHOR state. This 354 * state gathers all the committer information of a log entry. 355 * 356 * @param line a line of text from the git log output 357 */ 358 private void processGetRawCommitter(String line) { 359 if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) { 360 return; 361 } 362 // here we could set committer and committerDate, the same way as in processGetRawAuthor 363 status = STATUS_GET_COMMENT; 364 } 365 366 /** 367 * Process the current input line in the STATUS_GET_DATE state. This 368 * state gathers all of the date information that are part of a log entry. 369 * 370 * @param line a line of text from the git log output 371 */ 372 private void processGetDate(String line, Locale locale) { 373 Matcher matcher = DATE_PATTERN.matcher(line); 374 if (!matcher.matches()) { 375 return; 376 } 377 378 String datestring = matcher.group(1); 379 380 Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale); 381 382 currentChange.setDate(date); 383 384 status = STATUS_GET_COMMENT; 385 } 386 387 /** 388 * Process the current input line in the GET_COMMENT state. This 389 * state gathers all of the comments that are part of a log entry. 390 * 391 * @param line a line of text from the git log output 392 */ 393 private void processGetComment(String line) { 394 if (line.length() < 4) { 395 if (currentComment == null) { 396 currentComment = new StringBuilder(); 397 } else { 398 currentChange.setComment(currentComment.toString()); 399 status = STATUS_GET_FILE; 400 } 401 } else { 402 if (currentComment.length() > 0) { 403 currentComment.append('\n'); 404 } 405 406 currentComment.append(line.substring(4)); 407 } 408 } 409 410 /** 411 * Process the current input line in the GET_FILE state. This state 412 * adds each file entry line to the current change log entry. Note, 413 * the revision number for the entire entry is used for the revision 414 * number of each file. 415 * 416 * @param line A line of text from the git log output 417 */ 418 private void processGetFile(String line) { 419 if (line.length() == 0) { 420 if (currentChange != null) { 421 entries.add(currentChange); 422 } 423 424 resetChangeLog(); 425 426 status = STATUS_GET_HEADER; 427 } else { 428 Matcher matcher = FILE_PATTERN.matcher(line); 429 if (!matcher.matches()) { 430 return; 431 } 432 final String actionChar = matcher.group(1); 433 // action is currently not used 434 final ScmFileStatus action; 435 String name = matcher.group(2); 436 String originalName = null; 437 String originalRevision = null; 438 if ("A".equals(actionChar)) { 439 action = ScmFileStatus.ADDED; 440 } else if ("M".equals(actionChar)) { 441 action = ScmFileStatus.MODIFIED; 442 } else if ("D".equals(actionChar)) { 443 action = ScmFileStatus.DELETED; 444 } else if ("R".equals(actionChar)) { 445 action = ScmFileStatus.RENAMED; 446 originalName = name; 447 name = matcher.group(4); 448 originalRevision = currentChange.getParentRevision(); 449 } else if ("C".equals(actionChar)) { 450 action = ScmFileStatus.COPIED; 451 originalName = name; 452 name = matcher.group(4); 453 originalRevision = currentChange.getParentRevision(); 454 } else { 455 action = ScmFileStatus.UNKNOWN; 456 } 457 458 final ChangeFile changeFile = new ChangeFile(name, currentRevision); 459 changeFile.setAction(action); 460 changeFile.setOriginalName(originalName); 461 changeFile.setOriginalRevision(originalRevision); 462 currentChange.addFile(changeFile); 463 } 464 } 465 466 private void resetChangeLog() { 467 currentComment = null; 468 currentChange = null; 469 } 470}