1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.scm.provider.git.gitexe.command.changelog;
20
21 import java.util.ArrayList;
22 import java.util.Calendar;
23 import java.util.Date;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.TimeZone;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29
30 import org.apache.maven.scm.ChangeFile;
31 import org.apache.maven.scm.ChangeSet;
32 import org.apache.maven.scm.ScmFileStatus;
33 import org.apache.maven.scm.util.AbstractConsumer;
34
35 /**
36 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
37 * @author Olivier Lamy
38 *
39 */
40 public class GitChangeLogConsumer extends AbstractConsumer {
41 /**
42 * Date formatter for git timestamp
43 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
44 */
45 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
46
47 /**
48 * State machine constant: expecting header
49 */
50 private static final int STATUS_GET_HEADER = 1;
51
52 /**
53 * State machine constant: expecting author information
54 */
55 private static final int STATUS_GET_AUTHOR = 2;
56
57 /**
58 * State machine constant: expecting parent hash information
59 */
60 private static final int STATUS_RAW_TREE = 21;
61
62 /**
63 * State machine constant: expecting parent hash information
64 */
65 private static final int STATUS_RAW_PARENT = 22;
66
67 /**
68 * State machine constant: expecting author name, email and timestamp information
69 */
70 private static final int STATUS_RAW_AUTHOR = 23;
71
72 /**
73 * State machine constant: expecting committer name, email and timestamp information
74 */
75 private static final int STATUS_RAW_COMMITTER = 24;
76
77 /**
78 * State machine constant: expecting date information
79 */
80 private static final int STATUS_GET_DATE = 3;
81
82 /**
83 * State machine constant: expecting file information
84 */
85 private static final int STATUS_GET_FILE = 4;
86
87 /**
88 * State machine constant: expecting comments
89 */
90 private static final int STATUS_GET_COMMENT = 5;
91
92 /**
93 * The pattern used to match git header lines
94 */
95 private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$");
96
97 /**
98 * The pattern used to match git author lines
99 */
100 private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)");
101
102 /**
103 * The pattern used to match git tree hash lines (raw mode)
104 */
105 private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)");
106
107 /**
108 * The pattern used to match git parent hash lines (raw mode)
109 */
110 private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)");
111
112 /**
113 * The pattern used to match git author lines (raw mode)
114 */
115 private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)");
116
117 /**
118 * The pattern used to match git author lines (raw mode)
119 */
120 private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)");
121
122 /**
123 * The pattern used to match git date lines
124 */
125 private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)");
126
127 /**
128 * The pattern used to match git file lines
129 */
130 private static final Pattern FILE_PATTERN =
131 Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?");
132
133 /**
134 * Current status of the parser
135 */
136 private int status = STATUS_GET_HEADER;
137
138 /**
139 * List of change log entries
140 */
141 private final List<ChangeSet> entries = new ArrayList<>();
142
143 /**
144 * The current log entry being processed by the parser
145 */
146 private ChangeSet currentChange;
147
148 /**
149 * The current revision of the entry being processed by the parser
150 */
151 private String currentRevision;
152
153 /**
154 * The current comment of the entry being processed by the parser
155 */
156 private StringBuilder currentComment;
157
158 private final String userDateFormat;
159
160 /**
161 * Default constructor.
162 */
163 public GitChangeLogConsumer(String userDateFormat) {
164 this.userDateFormat = userDateFormat;
165 }
166
167 public List<ChangeSet> getModifications() {
168 // this is needed since the processFile does not always get a the end-sequence correctly.
169 processGetFile("");
170
171 return entries;
172 }
173
174 // ----------------------------------------------------------------------
175 // StreamConsumer Implementation
176 // ----------------------------------------------------------------------
177
178 /**
179 * {@inheritDoc}
180 */
181 public void consumeLine(String line) {
182 switch (status) {
183 case STATUS_GET_HEADER:
184 processGetHeader(line);
185 break;
186 case STATUS_GET_AUTHOR:
187 processGetAuthor(line);
188 break;
189 case STATUS_GET_DATE:
190 processGetDate(line, null);
191 break;
192 case STATUS_GET_COMMENT:
193 processGetComment(line);
194 break;
195 case STATUS_GET_FILE:
196 processGetFile(line);
197 break;
198 case STATUS_RAW_TREE:
199 processGetRawTree(line);
200 break;
201 case STATUS_RAW_PARENT:
202 processGetRawParent(line);
203 break;
204 case STATUS_RAW_AUTHOR:
205 processGetRawAuthor(line);
206 break;
207 case STATUS_RAW_COMMITTER:
208 processGetRawCommitter(line);
209 break;
210 default:
211 throw new IllegalStateException("Unknown state: " + status);
212 }
213 }
214
215 // ----------------------------------------------------------------------
216 //
217 // ----------------------------------------------------------------------
218
219 /**
220 * Process the current input line in the GET_HEADER state. The
221 * author, date, and the revision of the entry are gathered. Note,
222 * Git does not have per-file revisions, instead, the entire
223 * branch is given a single revision number, which is also used for
224 * the revision number of each file.
225 *
226 * @param line A line of text from the git log output
227 */
228 private void processGetHeader(String line) {
229 Matcher matcher = HEADER_PATTERN.matcher(line);
230 if (!matcher.matches()) {
231 return;
232 }
233
234 currentRevision = matcher.group(1);
235
236 currentChange = new ChangeSet();
237
238 currentChange.setRevision(currentRevision);
239
240 // Extract the tags (if present)
241 String tagList = matcher.group(2);
242 if (tagList != null) {
243 String[] rawTags = tagList.split(",");
244 for (String rawTag : rawTags) {
245 String[] tagParts = rawTag.trim().split(":");
246 if (tagParts.length == 2 && "tag".equals(tagParts[0])) {
247 currentChange.addTag(tagParts[1].trim());
248 }
249 }
250 }
251
252 status = STATUS_GET_AUTHOR;
253 }
254
255 /**
256 * Process the current input line in the STATUS_GET_AUTHOR state. This
257 * state gathers all of the author information that are part of a log entry.
258 *
259 * @param line a line of text from the git log output
260 */
261 private void processGetAuthor(String line) {
262 // this autodetects 'raw' format
263 if (RAW_TREE_PATTERN.matcher(line).matches()) {
264 status = STATUS_RAW_TREE;
265 processGetRawTree(line);
266 return;
267 }
268
269 Matcher matcher = AUTHOR_PATTERN.matcher(line);
270 if (!matcher.matches()) {
271 return;
272 }
273 String author = matcher.group(1);
274
275 currentChange.setAuthor(author);
276
277 status = STATUS_GET_DATE;
278 }
279
280 /**
281 * Process the current input line in the STATUS_RAW_TREE state. This
282 * state gathers tree hash part of a log entry.
283 *
284 * @param line a line of text from the git log output
285 */
286 private void processGetRawTree(String line) {
287 if (!RAW_TREE_PATTERN.matcher(line).matches()) {
288 return;
289 }
290 // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
291 status = STATUS_RAW_PARENT;
292 }
293
294 /**
295 * Process the current input line in the STATUS_RAW_PARENT state. This
296 * state gathers parent revisions of a log entry.
297 *
298 * @param line a line of text from the git log output
299 */
300 private void processGetRawParent(String line) {
301 Matcher matcher = RAW_PARENT_PATTERN.matcher(line);
302 if (!matcher.matches()) {
303 status = STATUS_RAW_AUTHOR;
304 processGetRawAuthor(line);
305 return;
306 }
307 String parentHash = matcher.group(1);
308
309 addParentRevision(parentHash);
310 }
311
312 /**
313 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
314 * log. This method takes care of the difference.
315 *
316 * @param hash -
317 */
318 private void addParentRevision(String hash) {
319 if (currentChange.getParentRevision() == null) {
320 currentChange.setParentRevision(hash);
321 } else {
322 currentChange.addMergedRevision(hash);
323 }
324 }
325
326 /**
327 * Process the current input line in the STATUS_RAW_AUTHOR state. This
328 * state gathers all the author information of a log entry.
329 *
330 * @param line a line of text from the git log output
331 */
332 private void processGetRawAuthor(String line) {
333 Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line);
334 if (!matcher.matches()) {
335 return;
336 }
337 String author = matcher.group(1);
338 currentChange.setAuthor(author);
339
340 String datestring = matcher.group(2);
341 String tz = matcher.group(3);
342
343 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
344 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
345 Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz));
346 c.setTimeInMillis(Long.parseLong(datestring) * 1000);
347 currentChange.setDate(c.getTime());
348
349 status = STATUS_RAW_COMMITTER;
350 }
351
352 /**
353 * Process the current input line in the STATUS_RAW_AUTHOR state. This
354 * state gathers all the committer information of a log entry.
355 *
356 * @param line a line of text from the git log output
357 */
358 private void processGetRawCommitter(String line) {
359 if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) {
360 return;
361 }
362 // here we could set committer and committerDate, the same way as in processGetRawAuthor
363 status = STATUS_GET_COMMENT;
364 }
365
366 /**
367 * Process the current input line in the STATUS_GET_DATE state. This
368 * state gathers all of the date information that are part of a log entry.
369 *
370 * @param line a line of text from the git log output
371 */
372 private void processGetDate(String line, Locale locale) {
373 Matcher matcher = DATE_PATTERN.matcher(line);
374 if (!matcher.matches()) {
375 return;
376 }
377
378 String datestring = matcher.group(1);
379
380 Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale);
381
382 currentChange.setDate(date);
383
384 status = STATUS_GET_COMMENT;
385 }
386
387 /**
388 * Process the current input line in the GET_COMMENT state. This
389 * state gathers all of the comments that are part of a log entry.
390 *
391 * @param line a line of text from the git log output
392 */
393 private void processGetComment(String line) {
394 if (line.length() < 4) {
395 if (currentComment == null) {
396 currentComment = new StringBuilder();
397 } else {
398 currentChange.setComment(currentComment.toString());
399 status = STATUS_GET_FILE;
400 }
401 } else {
402 if (currentComment.length() > 0) {
403 currentComment.append('\n');
404 }
405
406 currentComment.append(line.substring(4));
407 }
408 }
409
410 /**
411 * Process the current input line in the GET_FILE state. This state
412 * adds each file entry line to the current change log entry. Note,
413 * the revision number for the entire entry is used for the revision
414 * number of each file.
415 *
416 * @param line A line of text from the git log output
417 */
418 private void processGetFile(String line) {
419 if (line.length() == 0) {
420 if (currentChange != null) {
421 entries.add(currentChange);
422 }
423
424 resetChangeLog();
425
426 status = STATUS_GET_HEADER;
427 } else {
428 Matcher matcher = FILE_PATTERN.matcher(line);
429 if (!matcher.matches()) {
430 return;
431 }
432 final String actionChar = matcher.group(1);
433 // action is currently not used
434 final ScmFileStatus action;
435 String name = matcher.group(2);
436 String originalName = null;
437 String originalRevision = null;
438 if ("A".equals(actionChar)) {
439 action = ScmFileStatus.ADDED;
440 } else if ("M".equals(actionChar)) {
441 action = ScmFileStatus.MODIFIED;
442 } else if ("D".equals(actionChar)) {
443 action = ScmFileStatus.DELETED;
444 } else if ("R".equals(actionChar)) {
445 action = ScmFileStatus.RENAMED;
446 originalName = name;
447 name = matcher.group(4);
448 originalRevision = currentChange.getParentRevision();
449 } else if ("C".equals(actionChar)) {
450 action = ScmFileStatus.COPIED;
451 originalName = name;
452 name = matcher.group(4);
453 originalRevision = currentChange.getParentRevision();
454 } else {
455 action = ScmFileStatus.UNKNOWN;
456 }
457
458 final ChangeFile changeFile = new ChangeFile(name, currentRevision);
459 changeFile.setAction(action);
460 changeFile.setOriginalName(originalName);
461 changeFile.setOriginalRevision(originalRevision);
462 currentChange.addFile(changeFile);
463 }
464 }
465
466 private void resetChangeLog() {
467 currentComment = null;
468 currentChange = null;
469 }
470 }