git_command: Augment underlying git errors with suggestions

This change appends suggestions to the underlying git error to make the
error slightly more actionable.

DD: go/improve-repo-error-reporting & go/tee-repo-stderr

Bug: b/292704435
Change-Id: I2bf8bea5fca42c6a9acd2fadc70f58f22456e027
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/387774
Commit-Queue: Aravind Vasudevan <aravindvasudev@google.com>
Reviewed-by: Jason Chang <jasonnc@google.com>
Tested-by: Aravind Vasudevan <aravindvasudev@google.com>
Reviewed-by: Aravind Vasudevan <aravindvasudev@google.com>
diff --git a/git_command.py b/git_command.py
index fe1e48d..ef6e321 100644
--- a/git_command.py
+++ b/git_command.py
@@ -15,6 +15,7 @@
 import functools
 import json
 import os
+import re
 import subprocess
 import sys
 from typing import Any, Optional
@@ -24,6 +25,7 @@
 from git_refs import HEAD
 from git_trace2_event_log_base import BaseEventLog
 import platform_utils
+from repo_logging import RepoLogger
 from repo_trace import IsTrace
 from repo_trace import REPO_TRACE
 from repo_trace import Trace
@@ -50,9 +52,11 @@
 ERROR_EVENT_LOGGING_PREFIX = "RepoGitCommandError"
 # Common line length limit
 GIT_ERROR_STDOUT_LINES = 1
-GIT_ERROR_STDERR_LINES = 1
+GIT_ERROR_STDERR_LINES = 10
 INVALID_GIT_EXIT_CODE = 126
 
+logger = RepoLogger(__file__)
+
 
 class _GitCall(object):
     @functools.lru_cache(maxsize=None)
@@ -60,7 +64,7 @@
         ret = Wrapper().ParseGitVersion()
         if ret is None:
             msg = "fatal: unable to detect git version"
-            print(msg, file=sys.stderr)
+            logger.error(msg)
             raise GitRequireError(msg)
         return ret
 
@@ -135,10 +139,11 @@
         # `git config --get` is documented to produce an exit status of `1`
         # if the requested variable is not present in the configuration.
         # Report any other return value as an error.
-        print(
+        logger.error(
             "repo: error: 'git config --get' call failed with return code: "
-            "%r, stderr: %r" % (retval, p.stderr),
-            file=sys.stderr,
+            "%r, stderr: %r",
+            retval,
+            p.stderr,
         )
     return path
 
@@ -212,7 +217,7 @@
         if msg:
             msg = " for " + msg
         error_msg = "fatal: git %s or later required%s" % (need, msg)
-        print(error_msg, file=sys.stderr)
+        logger.error(error_msg)
         raise GitRequireError(error_msg)
     return False
 
@@ -297,6 +302,7 @@
         self.project = project
         self.cmdv = cmdv
         self.verify_command = verify_command
+        self.stdout, self.stderr = None, None
 
         # Git on Windows wants its paths only using / for reliability.
         if platform_utils.isWindows():
@@ -326,14 +332,6 @@
                 command.append("--progress")
         command.extend(cmdv[1:])
 
-        stdin = subprocess.PIPE if input else None
-        stdout = subprocess.PIPE if capture_stdout else None
-        stderr = (
-            subprocess.STDOUT
-            if merge_output
-            else (subprocess.PIPE if capture_stderr else None)
-        )
-
         event_log = (
             BaseEventLog(env=env, add_init_count=True)
             if add_event_log
@@ -344,9 +342,9 @@
             self._RunCommand(
                 command,
                 env,
-                stdin=stdin,
-                stdout=stdout,
-                stderr=stderr,
+                capture_stdout=capture_stdout,
+                capture_stderr=capture_stderr,
+                merge_output=merge_output,
                 ssh_proxy=ssh_proxy,
                 cwd=cwd,
                 input=input,
@@ -377,13 +375,46 @@
         self,
         command,
         env,
-        stdin=None,
-        stdout=None,
-        stderr=None,
+        capture_stdout=False,
+        capture_stderr=False,
+        merge_output=False,
         ssh_proxy=None,
         cwd=None,
         input=None,
     ):
+        # Set subprocess.PIPE for streams that need to be captured.
+        stdin = subprocess.PIPE if input else None
+        stdout = subprocess.PIPE if capture_stdout else None
+        stderr = (
+            subprocess.STDOUT
+            if merge_output
+            else (subprocess.PIPE if capture_stderr else None)
+        )
+
+        # tee_stderr acts like a tee command for stderr, in that, it captures
+        # stderr from the subprocess and streams it back to sys.stderr, while
+        # keeping a copy in-memory.
+        # This allows us to store stderr logs from the subprocess into
+        # GitCommandError.
+        # Certain git operations, such as `git push`, writes diagnostic logs,
+        # such as, progress bar for pushing, into stderr. To ensure we don't
+        # break git's UX, we need to write to sys.stderr as we read from the
+        # subprocess. Setting encoding or errors makes subprocess return
+        # io.TextIOWrapper, which is line buffered. To avoid line-buffering
+        # while tee-ing stderr, we unset these kwargs. See GitCommand._Tee
+        # for tee-ing between the streams.
+        # We tee stderr iff the caller doesn't want to capture any stream to
+        # not disrupt the existing flow.
+        # See go/tee-repo-stderr for more context.
+        tee_stderr = False
+        kwargs = {"encoding": "utf-8", "errors": "backslashreplace"}
+        if not (stdin or stdout or stderr):
+            tee_stderr = True
+            # stderr will be written back to sys.stderr even though it is
+            # piped here.
+            stderr = subprocess.PIPE
+            kwargs = {}
+
         dbg = ""
         if IsTrace():
             global LAST_CWD
@@ -430,11 +461,10 @@
                     command,
                     cwd=cwd,
                     env=env,
-                    encoding="utf-8",
-                    errors="backslashreplace",
                     stdin=stdin,
                     stdout=stdout,
                     stderr=stderr,
+                    **kwargs,
                 )
             except Exception as e:
                 raise GitPopenCommandError(
@@ -449,13 +479,45 @@
             self.process = p
 
             try:
-                self.stdout, self.stderr = p.communicate(input=input)
+                if tee_stderr:
+                    # tee_stderr streams stderr to sys.stderr while capturing
+                    # a copy within self.stderr. tee_stderr is only enabled
+                    # when the caller wants to pipe no stream.
+                    self.stderr = self._Tee(p.stderr, sys.stderr)
+                else:
+                    self.stdout, self.stderr = p.communicate(input=input)
             finally:
                 if ssh_proxy:
                     ssh_proxy.remove_client(p)
             self.rc = p.wait()
 
     @staticmethod
+    def _Tee(in_stream, out_stream):
+        """Writes text from in_stream to out_stream while recording in buffer.
+
+        Args:
+            in_stream: I/O stream to be read from.
+            out_stream: I/O stream to write to.
+
+        Returns:
+            A str containing everything read from the in_stream.
+        """
+        buffer = ""
+        chunk = in_stream.read1()
+        while chunk:
+            # Convert to str.
+            if not hasattr(chunk, "encode"):
+                chunk = chunk.decode("utf-8", "backslashreplace")
+
+            buffer += chunk
+            out_stream.write(chunk)
+            out_stream.flush()
+
+            chunk = in_stream.read1()
+
+        return buffer
+
+    @staticmethod
     def _GetBasicEnv():
         """Return a basic env for running git under.
 
@@ -517,6 +579,29 @@
     raised exclusively from non-zero exit codes returned from git commands.
     """
 
+    # Tuples with error formats and suggestions for those errors.
+    _ERROR_TO_SUGGESTION = [
+        (
+            re.compile("couldn't find remote ref .*"),
+            "Check if the provided ref exists in the remote.",
+        ),
+        (
+            re.compile("unable to access '.*': .*"),
+            (
+                "Please make sure you have the correct access rights and the "
+                "repository exists."
+            ),
+        ),
+        (
+            re.compile("'.*' does not appear to be a git repository"),
+            "Are you running this repo command outside of a repo workspace?",
+        ),
+        (
+            re.compile("not a git repository"),
+            "Are you running this repo command outside of a repo workspace?",
+        ),
+    ]
+
     def __init__(
         self,
         message: str = DEFAULT_GIT_FAIL_MESSAGE,
@@ -533,16 +618,37 @@
         self.git_stdout = git_stdout
         self.git_stderr = git_stderr
 
+    @property
+    @functools.lru_cache
+    def suggestion(self):
+        """Returns helpful next steps for the given stderr."""
+        if not self.git_stderr:
+            return self.git_stderr
+
+        for err, suggestion in self._ERROR_TO_SUGGESTION:
+            if err.search(self.git_stderr):
+                return suggestion
+
+        return None
+
     def __str__(self):
         args = "[]" if not self.command_args else " ".join(self.command_args)
         error_type = type(self).__name__
-        return f"""{error_type}: {self.message}
-    Project: {self.project}
-    Args: {args}
-    Stdout:
-{self.git_stdout}
-    Stderr:
-{self.git_stderr}"""
+        string = f"{error_type}: '{args}' on {self.project} failed"
+
+        if self.message != DEFAULT_GIT_FAIL_MESSAGE:
+            string += f": {self.message}"
+
+        if self.git_stdout:
+            string += f"\nstdout: {self.git_stdout}"
+
+        if self.git_stderr:
+            string += f"\nstderr: {self.git_stderr}"
+
+        if self.suggestion:
+            string += f"\nsuggestion: {self.suggestion}"
+
+        return string
 
 
 class GitPopenCommandError(GitError):
diff --git a/project.py b/project.py
index 3acecd6..642c123 100644
--- a/project.py
+++ b/project.py
@@ -1135,7 +1135,7 @@
         url = branch.remote.ReviewUrl(self.UserEmail, validate_certs)
         if url is None:
             raise UploadError("review not configured", project=self.name)
-        cmd = ["push"]
+        cmd = ["push", "--progress"]
         if dryrun:
             cmd.append("-n")
 
diff --git a/tests/test_git_command.py b/tests/test_git_command.py
index c803d28..881cccb 100644
--- a/tests/test_git_command.py
+++ b/tests/test_git_command.py
@@ -14,6 +14,7 @@
 
 """Unittests for the git_command.py module."""
 
+import io
 import os
 import re
 import subprocess
@@ -74,6 +75,10 @@
         class MockPopen(object):
             rc = 0
 
+            def __init__(self):
+                self.stdout = io.BufferedReader(io.BytesIO())
+                self.stderr = io.BufferedReader(io.BytesIO())
+
             def communicate(
                 self, input: str = None, timeout: float = None
             ) -> [str, str]:
@@ -117,6 +122,115 @@
         self.assertEqual(1, r.Wait())
 
 
+class GitCommandStreamLogsTest(unittest.TestCase):
+    """Tests the GitCommand class stderr log streaming cases."""
+
+    def setUp(self):
+        self.mock_process = mock.MagicMock()
+        self.mock_process.communicate.return_value = (None, None)
+        self.mock_process.wait.return_value = 0
+
+        self.mock_popen = mock.MagicMock()
+        self.mock_popen.return_value = self.mock_process
+        mock.patch("subprocess.Popen", self.mock_popen).start()
+
+    def tearDown(self):
+        mock.patch.stopall()
+
+    def test_does_not_stream_logs_when_input_is_set(self):
+        git_command.GitCommand(None, ["status"], input="foo")
+
+        self.mock_popen.assert_called_once_with(
+            ["git", "status"],
+            cwd=None,
+            env=mock.ANY,
+            encoding="utf-8",
+            errors="backslashreplace",
+            stdin=subprocess.PIPE,
+            stdout=None,
+            stderr=None,
+        )
+        self.mock_process.communicate.assert_called_once_with(input="foo")
+        self.mock_process.stderr.read1.assert_not_called()
+
+    def test_does_not_stream_logs_when_stdout_is_set(self):
+        git_command.GitCommand(None, ["status"], capture_stdout=True)
+
+        self.mock_popen.assert_called_once_with(
+            ["git", "status"],
+            cwd=None,
+            env=mock.ANY,
+            encoding="utf-8",
+            errors="backslashreplace",
+            stdin=None,
+            stdout=subprocess.PIPE,
+            stderr=None,
+        )
+        self.mock_process.communicate.assert_called_once_with(input=None)
+        self.mock_process.stderr.read1.assert_not_called()
+
+    def test_does_not_stream_logs_when_stderr_is_set(self):
+        git_command.GitCommand(None, ["status"], capture_stderr=True)
+
+        self.mock_popen.assert_called_once_with(
+            ["git", "status"],
+            cwd=None,
+            env=mock.ANY,
+            encoding="utf-8",
+            errors="backslashreplace",
+            stdin=None,
+            stdout=None,
+            stderr=subprocess.PIPE,
+        )
+        self.mock_process.communicate.assert_called_once_with(input=None)
+        self.mock_process.stderr.read1.assert_not_called()
+
+    def test_does_not_stream_logs_when_merge_output_is_set(self):
+        git_command.GitCommand(None, ["status"], merge_output=True)
+
+        self.mock_popen.assert_called_once_with(
+            ["git", "status"],
+            cwd=None,
+            env=mock.ANY,
+            encoding="utf-8",
+            errors="backslashreplace",
+            stdin=None,
+            stdout=None,
+            stderr=subprocess.STDOUT,
+        )
+        self.mock_process.communicate.assert_called_once_with(input=None)
+        self.mock_process.stderr.read1.assert_not_called()
+
+    @mock.patch("sys.stderr")
+    def test_streams_stderr_when_no_stream_is_set(self, mock_stderr):
+        logs = "\n".join(
+            [
+                "Enumerating objects: 5, done.",
+                "Counting objects: 100% (5/5), done.",
+                "Writing objects: 100% (3/3), 330 bytes | 330 KiB/s, done.",
+                "remote: Processing changes: refs: 1, new: 1, done ",
+                "remote: SUCCESS",
+            ]
+        )
+        self.mock_process.stderr = io.BufferedReader(
+            io.BytesIO(bytes(logs, "utf-8"))
+        )
+
+        cmd = git_command.GitCommand(None, ["push"])
+
+        self.mock_popen.assert_called_once_with(
+            ["git", "push"],
+            cwd=None,
+            env=mock.ANY,
+            stdin=None,
+            stdout=None,
+            stderr=subprocess.PIPE,
+        )
+        self.mock_process.communicate.assert_not_called()
+        mock_stderr.write.assert_called_once_with(logs)
+        self.assertEqual(cmd.stderr, logs)
+
+
 class GitCallUnitTest(unittest.TestCase):
     """Tests the _GitCall class (via git_command.git)."""
 
@@ -214,3 +328,22 @@
         with self.assertRaises(git_command.GitRequireError) as e:
             git_command.git_require((2,), fail=True, msg="so sad")
             self.assertNotEqual(0, e.code)
+
+
+class GitCommandErrorTest(unittest.TestCase):
+    """Test for the GitCommandError class."""
+
+    def test_augument_stderr(self):
+        self.assertEqual(
+            git_command.GitCommandError(
+                git_stderr="couldn't find remote ref refs/heads/foo"
+            ).suggestion,
+            "Check if the provided ref exists in the remote.",
+        )
+
+        self.assertEqual(
+            git_command.GitCommandError(
+                git_stderr="'foobar' does not appear to be a git repository"
+            ).suggestion,
+            "Are you running this repo command outside of a repo workspace?",
+        )