001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.maven.scm.provider.git.gitexe.command.changelog; 020 021import java.util.ArrayList; 022import java.util.Calendar; 023import java.util.Date; 024import java.util.List; 025import java.util.Locale; 026import java.util.TimeZone; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.maven.scm.ChangeFile; 031import org.apache.maven.scm.ChangeSet; 032import org.apache.maven.scm.ScmFileStatus; 033import org.apache.maven.scm.util.AbstractConsumer; 034 035/** 036 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a> 037 * @author Olivier Lamy 038 */ 039public class GitChangeLogConsumer extends AbstractConsumer { 040 /** 041 * Date formatter for git timestamp 042 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200. 043 */ 044 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z"; 045 046 /** 047 * State machine constant: expecting header. 048 */ 049 private static final int STATUS_GET_HEADER = 1; 050 051 /** 052 * State machine constant: expecting author information. 053 */ 054 private static final int STATUS_GET_AUTHOR = 2; 055 056 /** 057 * State machine constant: expecting parent hash information. 058 */ 059 private static final int STATUS_RAW_TREE = 21; 060 061 /** 062 * State machine constant: expecting parent hash information. 063 */ 064 private static final int STATUS_RAW_PARENT = 22; 065 066 /** 067 * State machine constant: expecting author name, email and timestamp information. 068 */ 069 private static final int STATUS_RAW_AUTHOR = 23; 070 071 /** 072 * State machine constant: expecting committer name, email and timestamp information. 073 */ 074 private static final int STATUS_RAW_COMMITTER = 24; 075 076 /** 077 * State machine constant: expecting date information. 078 */ 079 private static final int STATUS_GET_DATE = 3; 080 081 /** 082 * State machine constant: expecting file information. 083 */ 084 private static final int STATUS_GET_FILE = 4; 085 086 /** 087 * State machine constant: expecting comments. 088 */ 089 private static final int STATUS_GET_COMMENT = 5; 090 091 /** 092 * The pattern used to match git header lines. 093 */ 094 private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$"); 095 096 /** 097 * The pattern used to match git author lines. 098 */ 099 private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)"); 100 101 /** 102 * The pattern used to match git tree hash lines (raw mode) 103 */ 104 private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)"); 105 106 /** 107 * The pattern used to match git parent hash lines (raw mode) 108 */ 109 private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)"); 110 111 /** 112 * The pattern used to match git author lines (raw mode) 113 */ 114 private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)"); 115 116 /** 117 * The pattern used to match git author lines (raw mode) 118 */ 119 private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)"); 120 121 /** 122 * The pattern used to match git date lines. 123 */ 124 private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)"); 125 126 /** 127 * The pattern used to match git file lines. 128 */ 129 private static final Pattern FILE_PATTERN = 130 Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?"); 131 132 /** 133 * Current status of the parser. 134 */ 135 private int status = STATUS_GET_HEADER; 136 137 /** 138 * List of change log entries. 139 */ 140 private final List<ChangeSet> entries = new ArrayList<>(); 141 142 /** 143 * The current log entry being processed by the parser. 144 */ 145 private ChangeSet currentChange; 146 147 /** 148 * The current revision of the entry being processed by the parser. 149 */ 150 private String currentRevision; 151 152 /** 153 * The current comment of the entry being processed by the parser. 154 */ 155 private StringBuilder currentComment; 156 157 private final String userDateFormat; 158 159 /** 160 * Default constructor. 161 */ 162 public GitChangeLogConsumer(String userDateFormat) { 163 this.userDateFormat = userDateFormat; 164 } 165 166 public List<ChangeSet> getModifications() { 167 // this is needed since the processFile does not always get a the end-sequence correctly. 168 processGetFile(""); 169 170 return entries; 171 } 172 173 // ---------------------------------------------------------------------- 174 // StreamConsumer Implementation 175 // ---------------------------------------------------------------------- 176 177 /** 178 * {@inheritDoc} 179 */ 180 public void consumeLine(String line) { 181 switch (status) { 182 case STATUS_GET_HEADER: 183 processGetHeader(line); 184 break; 185 case STATUS_GET_AUTHOR: 186 processGetAuthor(line); 187 break; 188 case STATUS_GET_DATE: 189 processGetDate(line, null); 190 break; 191 case STATUS_GET_COMMENT: 192 processGetComment(line); 193 break; 194 case STATUS_GET_FILE: 195 processGetFile(line); 196 break; 197 case STATUS_RAW_TREE: 198 processGetRawTree(line); 199 break; 200 case STATUS_RAW_PARENT: 201 processGetRawParent(line); 202 break; 203 case STATUS_RAW_AUTHOR: 204 processGetRawAuthor(line); 205 break; 206 case STATUS_RAW_COMMITTER: 207 processGetRawCommitter(line); 208 break; 209 default: 210 throw new IllegalStateException("Unknown state: " + status); 211 } 212 } 213 214 // ---------------------------------------------------------------------- 215 // 216 // ---------------------------------------------------------------------- 217 218 /** 219 * Process the current input line in the GET_HEADER state. The 220 * author, date, and the revision of the entry are gathered. Note, 221 * Git does not have per-file revisions, instead, the entire 222 * branch is given a single revision number, which is also used for 223 * the revision number of each file. 224 * 225 * @param line a line of text from the git log output 226 */ 227 private void processGetHeader(String line) { 228 Matcher matcher = HEADER_PATTERN.matcher(line); 229 if (!matcher.matches()) { 230 return; 231 } 232 233 currentRevision = matcher.group(1); 234 235 currentChange = new ChangeSet(); 236 237 currentChange.setRevision(currentRevision); 238 239 // Extract the tags (if present) 240 String tagList = matcher.group(2); 241 if (tagList != null) { 242 String[] rawTags = tagList.split(","); 243 for (String rawTag : rawTags) { 244 String[] tagParts = rawTag.trim().split(":"); 245 if (tagParts.length == 2 && "tag".equals(tagParts[0])) { 246 currentChange.addTag(tagParts[1].trim()); 247 } 248 } 249 } 250 251 status = STATUS_GET_AUTHOR; 252 } 253 254 /** 255 * Process the current input line in the STATUS_GET_AUTHOR state. This 256 * state gathers all of the author information that are part of a log entry. 257 * 258 * @param line a line of text from the git log output 259 */ 260 private void processGetAuthor(String line) { 261 // this autodetects 'raw' format 262 if (RAW_TREE_PATTERN.matcher(line).matches()) { 263 status = STATUS_RAW_TREE; 264 processGetRawTree(line); 265 return; 266 } 267 268 Matcher matcher = AUTHOR_PATTERN.matcher(line); 269 if (!matcher.matches()) { 270 return; 271 } 272 String author = matcher.group(1); 273 274 currentChange.setAuthor(author); 275 276 status = STATUS_GET_DATE; 277 } 278 279 /** 280 * Process the current input line in the STATUS_RAW_TREE state. This 281 * state gathers tree hash part of a log entry. 282 * 283 * @param line a line of text from the git log output 284 */ 285 private void processGetRawTree(String line) { 286 if (!RAW_TREE_PATTERN.matcher(line).matches()) { 287 return; 288 } 289 // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) ); 290 status = STATUS_RAW_PARENT; 291 } 292 293 /** 294 * Process the current input line in the STATUS_RAW_PARENT state. This 295 * state gathers parent revisions of a log entry. 296 * 297 * @param line a line of text from the git log output 298 */ 299 private void processGetRawParent(String line) { 300 Matcher matcher = RAW_PARENT_PATTERN.matcher(line); 301 if (!matcher.matches()) { 302 status = STATUS_RAW_AUTHOR; 303 processGetRawAuthor(line); 304 return; 305 } 306 String parentHash = matcher.group(1); 307 308 addParentRevision(parentHash); 309 } 310 311 /** 312 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the 313 * log. This method takes care of the difference. 314 * 315 * @param hash - 316 */ 317 private void addParentRevision(String hash) { 318 if (currentChange.getParentRevision() == null) { 319 currentChange.setParentRevision(hash); 320 } else { 321 currentChange.addMergedRevision(hash); 322 } 323 } 324 325 /** 326 * Process the current input line in the STATUS_RAW_AUTHOR state. This 327 * state gathers all the author information of a log entry. 328 * 329 * @param line a line of text from the git log output 330 */ 331 private void processGetRawAuthor(String line) { 332 Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line); 333 if (!matcher.matches()) { 334 return; 335 } 336 String author = matcher.group(1); 337 currentChange.setAuthor(author); 338 339 String datestring = matcher.group(2); 340 String tz = matcher.group(3); 341 342 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time 343 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here 344 Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz)); 345 c.setTimeInMillis(Long.parseLong(datestring) * 1000); 346 currentChange.setDate(c.getTime()); 347 348 status = STATUS_RAW_COMMITTER; 349 } 350 351 /** 352 * Process the current input line in the STATUS_RAW_AUTHOR state. This 353 * state gathers all the committer information of a log entry. 354 * 355 * @param line a line of text from the git log output 356 */ 357 private void processGetRawCommitter(String line) { 358 if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) { 359 return; 360 } 361 // here we could set committer and committerDate, the same way as in processGetRawAuthor 362 status = STATUS_GET_COMMENT; 363 } 364 365 /** 366 * Process the current input line in the STATUS_GET_DATE state. This 367 * state gathers all of the date information that are part of a log entry. 368 * 369 * @param line a line of text from the git log output 370 */ 371 private void processGetDate(String line, Locale locale) { 372 Matcher matcher = DATE_PATTERN.matcher(line); 373 if (!matcher.matches()) { 374 return; 375 } 376 377 String datestring = matcher.group(1); 378 379 Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale); 380 381 currentChange.setDate(date); 382 383 status = STATUS_GET_COMMENT; 384 } 385 386 /** 387 * Process the current input line in the GET_COMMENT state. This 388 * state gathers all of the comments that are part of a log entry. 389 * 390 * @param line a line of text from the git log output 391 */ 392 private void processGetComment(String line) { 393 if (line.length() < 4) { 394 if (currentComment == null) { 395 currentComment = new StringBuilder(); 396 } else { 397 currentChange.setComment(currentComment.toString()); 398 status = STATUS_GET_FILE; 399 } 400 } else { 401 if (currentComment.length() > 0) { 402 currentComment.append('\n'); 403 } 404 405 currentComment.append(line.substring(4)); 406 } 407 } 408 409 /** 410 * Process the current input line in the GET_FILE state. This state 411 * adds each file entry line to the current change log entry. Note, 412 * the revision number for the entire entry is used for the revision 413 * number of each file. 414 * 415 * @param line a line of text from the git log output 416 */ 417 private void processGetFile(String line) { 418 if (line.length() == 0) { 419 if (currentChange != null) { 420 entries.add(currentChange); 421 } 422 423 resetChangeLog(); 424 425 status = STATUS_GET_HEADER; 426 } else { 427 Matcher matcher = FILE_PATTERN.matcher(line); 428 if (!matcher.matches()) { 429 return; 430 } 431 final String actionChar = matcher.group(1); 432 // action is currently not used 433 final ScmFileStatus action; 434 String name = matcher.group(2); 435 String originalName = null; 436 String originalRevision = null; 437 if ("A".equals(actionChar)) { 438 action = ScmFileStatus.ADDED; 439 } else if ("M".equals(actionChar)) { 440 action = ScmFileStatus.MODIFIED; 441 } else if ("D".equals(actionChar)) { 442 action = ScmFileStatus.DELETED; 443 } else if ("R".equals(actionChar)) { 444 action = ScmFileStatus.RENAMED; 445 originalName = name; 446 name = matcher.group(4); 447 originalRevision = currentChange.getParentRevision(); 448 } else if ("C".equals(actionChar)) { 449 action = ScmFileStatus.COPIED; 450 originalName = name; 451 name = matcher.group(4); 452 originalRevision = currentChange.getParentRevision(); 453 } else { 454 action = ScmFileStatus.UNKNOWN; 455 } 456 457 final ChangeFile changeFile = new ChangeFile(name, currentRevision); 458 changeFile.setAction(action); 459 changeFile.setOriginalName(originalName); 460 changeFile.setOriginalRevision(originalRevision); 461 currentChange.addFile(changeFile); 462 } 463 } 464 465 private void resetChangeLog() { 466 currentComment = null; 467 currentChange = null; 468 } 469}