fix syntax error reporting from stdin (#357) (#716)

stevenkaras · web-flow · commit d875b02835fb · 2022-07-18T22:51:04.000-04:00
In b73a3d1, there was an assumption that text is None only if there was an encoding error with the file. However this was the case for all pythons before 3.9 when reading code from stdin. This takes care to correctly report as much context as possible, so errors aren't silently dropped with the unhelpful "problem decoding source" message.
diff --git a/pyflakes/api.py b/pyflakes/api.py
@@ -55,14 +55,7 @@ def check(codeString, filename, reporter=None):
                             text = None
             offset -= 1
 
-        # If there's an encoding problem with the file, the text is None.
-        if text is None:
-            # Avoid using msg, since for the only known case, it contains a
-            # bogus message that claims the encoding the file declared was
-            # unknown.
-            reporter.unexpectedError(filename, 'problem decoding source')
-        else:
-            reporter.syntaxError(filename, msg, lineno, offset, text)
+        reporter.syntaxError(filename, msg, lineno, offset, text)
         return 1
     except Exception:
         reporter.unexpectedError(filename, 'problem decoding source')
diff --git a/pyflakes/reporter.py b/pyflakes/reporter.py
@@ -51,19 +51,30 @@ def syntaxError(self, filename, msg, lineno, offset, text):
         @param text: The source code containing the syntax error.
         @ptype text: C{unicode}
         """
-        line = text.splitlines()[-1]
+        if text is None:
+            line = None
+        else:
+            line = text.splitlines()[-1]
+
+        # lineno might be 0 if the error came from stdin
+        lineno = max(lineno, 1)
+
         if offset is not None:
-            if sys.version_info < (3, 8):
+            if sys.version_info < (3, 8) and text is not None:
                 offset = offset - (len(text) - len(line)) + 1
+            # some versions of python emit an offset of -1 for certain encoding errors
+            offset = max(offset, 1)
             self._stderr.write('%s:%d:%d: %s\n' %
                                (filename, lineno, offset, msg))
         else:
             self._stderr.write('%s:%d: %s\n' % (filename, lineno, msg))
-        self._stderr.write(line)
-        self._stderr.write('\n')
-        if offset is not None:
-            self._stderr.write(re.sub(r'\S', ' ', line[:offset - 1]) +
-                               "^\n")
+
+        if line is not None:
+            self._stderr.write(line)
+            self._stderr.write('\n')
+            if offset is not None:
+                self._stderr.write(re.sub(r'\S', ' ', line[:offset - 1]) +
+                                   "^\n")
 
     def flake(self, message):
         """
diff --git a/pyflakes/test/test_api.py b/pyflakes/test/test_api.py
@@ -15,6 +15,7 @@
 from pyflakes.reporter import Reporter
 from pyflakes.api import (
     main,
+    check,
     checkPath,
     checkRecursive,
     iterSourceCode,
@@ -255,6 +256,17 @@ def test_syntaxErrorNoOffset(self):
              "bad line of source\n"),
             err.getvalue())
 
+    def test_syntaxErrorNoText(self):
+        """
+        C{syntaxError} doesn't include text or nonsensical offsets if C{text} is C{None}.
+
+        This typically happens when reporting syntax errors from stdin.
+        """
+        err = io.StringIO()
+        reporter = Reporter(None, err)
+        reporter.syntaxError('<stdin>', 'a problem', 0, 0, None)
+        self.assertEqual(("<stdin>:1:1: a problem\n"), err.getvalue())
+
     def test_multiLineSyntaxError(self):
         """
         If there's a multi-line syntax error, then we only report the last
@@ -606,7 +618,8 @@ def test_misencodedFileUTF8(self):
 """ % SNOWMAN).encode('utf-8')
         with self.makeTempFile(source) as sourcePath:
             self.assertHasErrors(
-                sourcePath, [f"{sourcePath}: problem decoding source\n"])
+                sourcePath,
+                [f"{sourcePath}:1:1: 'ascii' codec can't decode byte 0xe2 in position 21: ordinal not in range(128)\n"])  # noqa: E501
 
     def test_misencodedFileUTF16(self):
         """
@@ -648,6 +661,43 @@ def test_checkRecursive(self):
         finally:
             shutil.rmtree(tempdir)
 
+    def test_stdinReportsErrors(self):
+        """
+        L{check} reports syntax errors from stdin
+        """
+        source = "max(1 for i in range(10), key=lambda x: x+1)\n"
+        err = io.StringIO()
+        count = withStderrTo(err, check, source, "<stdin>")
+        self.assertEqual(count, 1)
+        errlines = err.getvalue().split("\n")[:-1]
+
+        if PYPY:
+            expected_error = [
+                "<stdin>:1:3: Generator expression must be parenthesized if not sole argument",  # noqa: E501
+                "max(1 for i in range(10), key=lambda x: x+1)",
+                "  ^",
+            ]
+        elif sys.version_info >= (3, 9):
+            expected_error = [
+                "<stdin>:1:5: Generator expression must be parenthesized",
+                "max(1 for i in range(10), key=lambda x: x+1)",
+                "    ^",
+            ]
+        elif sys.version_info >= (3, 8):
+            expected_error = [
+                "<stdin>:1:5: Generator expression must be parenthesized",
+            ]
+        elif sys.version_info >= (3, 7):
+            expected_error = [
+                "<stdin>:1:4: Generator expression must be parenthesized",
+            ]
+        elif sys.version_info >= (3, 6):
+            expected_error = [
+                "<stdin>:1:4: Generator expression must be parenthesized if not sole argument",  # noqa: E501
+            ]
+
+        self.assertEqual(errlines, expected_error)
+
 
 class IntegrationTests(TestCase):
     """