001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.maven.scm.provider.git.gitexe.command.changelog;
020
021import java.util.ArrayList;
022import java.util.Calendar;
023import java.util.Date;
024import java.util.List;
025import java.util.Locale;
026import java.util.TimeZone;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030import org.apache.maven.scm.ChangeFile;
031import org.apache.maven.scm.ChangeSet;
032import org.apache.maven.scm.ScmFileStatus;
033import org.apache.maven.scm.util.AbstractConsumer;
034
035/**
036 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
037 * @author Olivier Lamy
038 */
039public class GitChangeLogConsumer extends AbstractConsumer {
040    /**
041     * Date formatter for git timestamp
042     * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200.
043     */
044    private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
045
046    /**
047     * State machine constant: expecting header.
048     */
049    private static final int STATUS_GET_HEADER = 1;
050
051    /**
052     * State machine constant: expecting author information.
053     */
054    private static final int STATUS_GET_AUTHOR = 2;
055
056    /**
057     * State machine constant: expecting parent hash information.
058     */
059    private static final int STATUS_RAW_TREE = 21;
060
061    /**
062     * State machine constant: expecting parent hash information.
063     */
064    private static final int STATUS_RAW_PARENT = 22;
065
066    /**
067     * State machine constant: expecting author name, email and timestamp information.
068     */
069    private static final int STATUS_RAW_AUTHOR = 23;
070
071    /**
072     * State machine constant: expecting committer name, email and timestamp information.
073     */
074    private static final int STATUS_RAW_COMMITTER = 24;
075
076    /**
077     * State machine constant: expecting date information.
078     */
079    private static final int STATUS_GET_DATE = 3;
080
081    /**
082     * State machine constant: expecting file information.
083     */
084    private static final int STATUS_GET_FILE = 4;
085
086    /**
087     * State machine constant: expecting comments.
088     */
089    private static final int STATUS_GET_COMMENT = 5;
090
091    /**
092     * The pattern used to match git header lines.
093     */
094    private static final Pattern HEADER_PATTERN = Pattern.compile("^commit ([A-Fa-f0-9]+)(?: \\((.*)\\))?$");
095
096    /**
097     * The pattern used to match git author lines.
098     */
099    private static final Pattern AUTHOR_PATTERN = Pattern.compile("^Author: (.*)");
100
101    /**
102     * The pattern used to match git tree hash lines (raw mode)
103     */
104    private static final Pattern RAW_TREE_PATTERN = Pattern.compile("^tree ([A-Fa-f0-9]+)");
105
106    /**
107     * The pattern used to match git parent hash lines (raw mode)
108     */
109    private static final Pattern RAW_PARENT_PATTERN = Pattern.compile("^parent ([A-Fa-f0-9]+)");
110
111    /**
112     * The pattern used to match git author lines (raw mode)
113     */
114    private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile("^author (.+ <.+>) ([0-9]+) (.*)");
115
116    /**
117     * The pattern used to match git author lines (raw mode)
118     */
119    private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile("^committer (.+ <.+>) ([0-9]+) (.*)");
120
121    /**
122     * The pattern used to match git date lines.
123     */
124    private static final Pattern DATE_PATTERN = Pattern.compile("^Date:\\s*(.*)");
125
126    /**
127     * The pattern used to match git file lines.
128     */
129    private static final Pattern FILE_PATTERN =
130            Pattern.compile("^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?");
131
132    /**
133     * Current status of the parser.
134     */
135    private int status = STATUS_GET_HEADER;
136
137    /**
138     * List of change log entries.
139     */
140    private final List<ChangeSet> entries = new ArrayList<>();
141
142    /**
143     * The current log entry being processed by the parser.
144     */
145    private ChangeSet currentChange;
146
147    /**
148     * The current revision of the entry being processed by the parser.
149     */
150    private String currentRevision;
151
152    /**
153     * The current comment of the entry being processed by the parser.
154     */
155    private StringBuilder currentComment;
156
157    private final String userDateFormat;
158
159    /**
160     * Default constructor.
161     */
162    public GitChangeLogConsumer(String userDateFormat) {
163        this.userDateFormat = userDateFormat;
164    }
165
166    public List<ChangeSet> getModifications() {
167        // this is needed since the processFile does not always get a the end-sequence correctly.
168        processGetFile("");
169
170        return entries;
171    }
172
173    // ----------------------------------------------------------------------
174    // StreamConsumer Implementation
175    // ----------------------------------------------------------------------
176
177    /**
178     * {@inheritDoc}
179     */
180    public void consumeLine(String line) {
181        switch (status) {
182            case STATUS_GET_HEADER:
183                processGetHeader(line);
184                break;
185            case STATUS_GET_AUTHOR:
186                processGetAuthor(line);
187                break;
188            case STATUS_GET_DATE:
189                processGetDate(line, null);
190                break;
191            case STATUS_GET_COMMENT:
192                processGetComment(line);
193                break;
194            case STATUS_GET_FILE:
195                processGetFile(line);
196                break;
197            case STATUS_RAW_TREE:
198                processGetRawTree(line);
199                break;
200            case STATUS_RAW_PARENT:
201                processGetRawParent(line);
202                break;
203            case STATUS_RAW_AUTHOR:
204                processGetRawAuthor(line);
205                break;
206            case STATUS_RAW_COMMITTER:
207                processGetRawCommitter(line);
208                break;
209            default:
210                throw new IllegalStateException("Unknown state: " + status);
211        }
212    }
213
214    // ----------------------------------------------------------------------
215    //
216    // ----------------------------------------------------------------------
217
218    /**
219     * Process the current input line in the GET_HEADER state.  The
220     * author, date, and the revision of the entry are gathered.  Note,
221     * Git does not have per-file revisions, instead, the entire
222     * branch is given a single revision number, which is also used for
223     * the revision number of each file.
224     *
225     * @param line a line of text from the git log output
226     */
227    private void processGetHeader(String line) {
228        Matcher matcher = HEADER_PATTERN.matcher(line);
229        if (!matcher.matches()) {
230            return;
231        }
232
233        currentRevision = matcher.group(1);
234
235        currentChange = new ChangeSet();
236
237        currentChange.setRevision(currentRevision);
238
239        // Extract the tags (if present)
240        String tagList = matcher.group(2);
241        if (tagList != null) {
242            String[] rawTags = tagList.split(",");
243            for (String rawTag : rawTags) {
244                String[] tagParts = rawTag.trim().split(":");
245                if (tagParts.length == 2 && "tag".equals(tagParts[0])) {
246                    currentChange.addTag(tagParts[1].trim());
247                }
248            }
249        }
250
251        status = STATUS_GET_AUTHOR;
252    }
253
254    /**
255     * Process the current input line in the STATUS_GET_AUTHOR state.  This
256     * state gathers all of the author information that are part of a log entry.
257     *
258     * @param line a line of text from the git log output
259     */
260    private void processGetAuthor(String line) {
261        // this autodetects 'raw' format
262        if (RAW_TREE_PATTERN.matcher(line).matches()) {
263            status = STATUS_RAW_TREE;
264            processGetRawTree(line);
265            return;
266        }
267
268        Matcher matcher = AUTHOR_PATTERN.matcher(line);
269        if (!matcher.matches()) {
270            return;
271        }
272        String author = matcher.group(1);
273
274        currentChange.setAuthor(author);
275
276        status = STATUS_GET_DATE;
277    }
278
279    /**
280     * Process the current input line in the STATUS_RAW_TREE state.  This
281     * state gathers tree hash part of a log entry.
282     *
283     * @param line a line of text from the git log output
284     */
285    private void processGetRawTree(String line) {
286        if (!RAW_TREE_PATTERN.matcher(line).matches()) {
287            return;
288        }
289        // here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
290        status = STATUS_RAW_PARENT;
291    }
292
293    /**
294     * Process the current input line in the STATUS_RAW_PARENT state.  This
295     * state gathers parent revisions of a log entry.
296     *
297     * @param line a line of text from the git log output
298     */
299    private void processGetRawParent(String line) {
300        Matcher matcher = RAW_PARENT_PATTERN.matcher(line);
301        if (!matcher.matches()) {
302            status = STATUS_RAW_AUTHOR;
303            processGetRawAuthor(line);
304            return;
305        }
306        String parentHash = matcher.group(1);
307
308        addParentRevision(parentHash);
309    }
310
311    /**
312     * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
313     * log. This method takes care of the difference.
314     *
315     * @param hash -
316     */
317    private void addParentRevision(String hash) {
318        if (currentChange.getParentRevision() == null) {
319            currentChange.setParentRevision(hash);
320        } else {
321            currentChange.addMergedRevision(hash);
322        }
323    }
324
325    /**
326     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
327     * state gathers all the author information of a log entry.
328     *
329     * @param line a line of text from the git log output
330     */
331    private void processGetRawAuthor(String line) {
332        Matcher matcher = RAW_AUTHOR_PATTERN.matcher(line);
333        if (!matcher.matches()) {
334            return;
335        }
336        String author = matcher.group(1);
337        currentChange.setAuthor(author);
338
339        String datestring = matcher.group(2);
340        String tz = matcher.group(3);
341
342        // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
343        // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
344        Calendar c = Calendar.getInstance(TimeZone.getTimeZone(tz));
345        c.setTimeInMillis(Long.parseLong(datestring) * 1000);
346        currentChange.setDate(c.getTime());
347
348        status = STATUS_RAW_COMMITTER;
349    }
350
351    /**
352     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
353     * state gathers all the committer information of a log entry.
354     *
355     * @param line a line of text from the git log output
356     */
357    private void processGetRawCommitter(String line) {
358        if (!RAW_COMMITTER_PATTERN.matcher(line).matches()) {
359            return;
360        }
361        // here we could set committer and committerDate, the same way as in processGetRawAuthor
362        status = STATUS_GET_COMMENT;
363    }
364
365    /**
366     * Process the current input line in the STATUS_GET_DATE state.  This
367     * state gathers all of the date information that are part of a log entry.
368     *
369     * @param line a line of text from the git log output
370     */
371    private void processGetDate(String line, Locale locale) {
372        Matcher matcher = DATE_PATTERN.matcher(line);
373        if (!matcher.matches()) {
374            return;
375        }
376
377        String datestring = matcher.group(1);
378
379        Date date = parseDate(datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale);
380
381        currentChange.setDate(date);
382
383        status = STATUS_GET_COMMENT;
384    }
385
386    /**
387     * Process the current input line in the GET_COMMENT state.  This
388     * state gathers all of the comments that are part of a log entry.
389     *
390     * @param line a line of text from the git log output
391     */
392    private void processGetComment(String line) {
393        if (line.length() < 4) {
394            if (currentComment == null) {
395                currentComment = new StringBuilder();
396            } else {
397                currentChange.setComment(currentComment.toString());
398                status = STATUS_GET_FILE;
399            }
400        } else {
401            if (currentComment.length() > 0) {
402                currentComment.append('\n');
403            }
404
405            currentComment.append(line.substring(4));
406        }
407    }
408
409    /**
410     * Process the current input line in the GET_FILE state.  This state
411     * adds each file entry line to the current change log entry.  Note,
412     * the revision number for the entire entry is used for the revision
413     * number of each file.
414     *
415     * @param line a line of text from the git log output
416     */
417    private void processGetFile(String line) {
418        if (line.length() == 0) {
419            if (currentChange != null) {
420                entries.add(currentChange);
421            }
422
423            resetChangeLog();
424
425            status = STATUS_GET_HEADER;
426        } else {
427            Matcher matcher = FILE_PATTERN.matcher(line);
428            if (!matcher.matches()) {
429                return;
430            }
431            final String actionChar = matcher.group(1);
432            // action is currently not used
433            final ScmFileStatus action;
434            String name = matcher.group(2);
435            String originalName = null;
436            String originalRevision = null;
437            if ("A".equals(actionChar)) {
438                action = ScmFileStatus.ADDED;
439            } else if ("M".equals(actionChar)) {
440                action = ScmFileStatus.MODIFIED;
441            } else if ("D".equals(actionChar)) {
442                action = ScmFileStatus.DELETED;
443            } else if ("R".equals(actionChar)) {
444                action = ScmFileStatus.RENAMED;
445                originalName = name;
446                name = matcher.group(4);
447                originalRevision = currentChange.getParentRevision();
448            } else if ("C".equals(actionChar)) {
449                action = ScmFileStatus.COPIED;
450                originalName = name;
451                name = matcher.group(4);
452                originalRevision = currentChange.getParentRevision();
453            } else {
454                action = ScmFileStatus.UNKNOWN;
455            }
456
457            final ChangeFile changeFile = new ChangeFile(name, currentRevision);
458            changeFile.setAction(action);
459            changeFile.setOriginalName(originalName);
460            changeFile.setOriginalRevision(originalRevision);
461            currentChange.addFile(changeFile);
462        }
463    }
464
465    private void resetChangeLog() {
466        currentComment = null;
467        currentChange = null;
468    }
469}