Skip to content

Commit 3983281

Browse files
authored
Merge branch 'main' into sem-macosx-multiprocessing-module-C
2 parents 8af3be7 + f0daba1 commit 3983281

4 files changed

Lines changed: 19 additions & 63 deletions

File tree

Include/cpython/bytesobject.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
typedef struct {
66
PyObject_VAR_HEAD
77
Py_DEPRECATED(3.11) Py_hash_t ob_shash;
8-
unsigned char ob_sval[1];
8+
char ob_sval[1];
99

1010
/* Invariants:
1111
* ob_sval contains space for 'ob_size+1' elements.
@@ -20,7 +20,7 @@ PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
2020
#define _PyBytes_CAST(op) \
2121
(assert(PyBytes_Check(op)), _Py_CAST(PyBytesObject*, op))
2222

23-
static inline unsigned char* PyBytes_AS_STRING(PyObject *op)
23+
static inline char* PyBytes_AS_STRING(PyObject *op)
2424
{
2525
return _PyBytes_CAST(op)->ob_sval;
2626
}

Lib/test/test_robotparser.py

Lines changed: 16 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -646,23 +646,26 @@ def test_group_without_user_agent(self):
646646
)
647647
class BaseLocalNetworkTestCase:
648648

649-
@classmethod
650-
def setUpClass(cls):
649+
def setUp(self):
651650
# clear _opener global variable
652-
cls.addClassCleanup(urllib.request.urlcleanup)
651+
self.addCleanup(urllib.request.urlcleanup)
653652

654-
cls.server = HTTPServer((socket_helper.HOST, 0), cls.RobotHandler)
655-
cls.addClassCleanup(cls.server.server_close)
653+
self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler)
656654

657-
t = threading.Thread(
655+
self.t = threading.Thread(
658656
name='HTTPServer serving',
659-
target=cls.server.serve_forever,
657+
target=self.server.serve_forever,
660658
# Short poll interval to make the test finish quickly.
661659
# Time between requests is short enough that we won't wake
662660
# up spuriously too many times.
663661
kwargs={'poll_interval':0.01})
664-
cls.enterClassContext(threading_helper.start_threads([t]))
665-
cls.addClassCleanup(cls.server.shutdown)
662+
self.t.daemon = True # In case this function raises.
663+
self.t.start()
664+
665+
def tearDown(self):
666+
self.server.shutdown()
667+
self.t.join()
668+
self.server.server_close()
666669

667670

668671
SAMPLE_ROBOTS_TXT = b'''\
@@ -684,6 +687,7 @@ def do_GET(self):
684687
def log_message(self, format, *args):
685688
pass
686689

690+
@threading_helper.reap_threads
687691
def testRead(self):
688692
# Test that reading a weird robots.txt doesn't fail.
689693
addr = self.server.server_address
@@ -705,62 +709,24 @@ def testRead(self):
705709
self.assertFalse(parser.can_fetch(agent, url + '/%2F[spam]/path'))
706710

707711

708-
class HttpErrorsTestCase(BaseLocalNetworkTestCase, unittest.TestCase):
712+
class PasswordProtectedSiteTestCase(BaseLocalNetworkTestCase, unittest.TestCase):
709713
class RobotHandler(BaseHTTPRequestHandler):
710714

711715
def do_GET(self):
712-
self.send_error(self.server.return_code)
716+
self.send_error(403, "Forbidden access")
713717

714718
def log_message(self, format, *args):
715719
pass
716720

717-
def setUp(self):
718-
# Make sure that a valid code is set in the test.
719-
self.server.return_code = None
720-
721+
@threading_helper.reap_threads
721722
def testPasswordProtectedSite(self):
722-
self.server.return_code = 403
723723
addr = self.server.server_address
724724
url = 'http://' + socket_helper.HOST + ':' + str(addr[1])
725725
robots_url = url + "/robots.txt"
726726
parser = urllib.robotparser.RobotFileParser()
727727
parser.set_url(url)
728728
parser.read()
729729
self.assertFalse(parser.can_fetch("*", robots_url))
730-
self.assertFalse(parser.can_fetch("*", url + '/some/file.html'))
731-
732-
def testNotFound(self):
733-
self.server.return_code = 404
734-
addr = self.server.server_address
735-
url = f'http://{socket_helper.HOST}:{addr[1]}'
736-
robots_url = url + "/robots.txt"
737-
parser = urllib.robotparser.RobotFileParser()
738-
parser.set_url(url)
739-
parser.read()
740-
self.assertTrue(parser.can_fetch("*", robots_url))
741-
self.assertTrue(parser.can_fetch("*", url + '/path/file.html'))
742-
743-
def testTeapot(self):
744-
self.server.return_code = 418
745-
addr = self.server.server_address
746-
url = f'http://{socket_helper.HOST}:{addr[1]}'
747-
robots_url = url + "/robots.txt"
748-
parser = urllib.robotparser.RobotFileParser()
749-
parser.set_url(url)
750-
parser.read()
751-
self.assertTrue(parser.can_fetch("*", robots_url))
752-
self.assertTrue(parser.can_fetch("*", url + '/pot-1?milk-type=Cream'))
753-
754-
def testServiceUnavailable(self):
755-
self.server.return_code = 503
756-
addr = self.server.server_address
757-
url = f'http://{socket_helper.HOST}:{addr[1]}'
758-
robots_url = url + "/robots.txt"
759-
parser = urllib.robotparser.RobotFileParser()
760-
parser.set_url(url)
761-
parser.read()
762-
self.assertFalse(parser.can_fetch("*", robots_url))
763-
self.assertFalse(parser.can_fetch("*", url + '/path/file.html'))
764730

765731

766732
@support.requires_working_socket()

Lib/urllib/robotparser.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,9 @@ def read(self):
6565
f = urllib.request.urlopen(self.url)
6666
except urllib.error.HTTPError as err:
6767
if err.code in (401, 403):
68-
# If access to robot.txt has the status Unauthorized/Forbidden,
69-
# then most likely this applies to the entire site.
7068
self.disallow_all = True
71-
elif 400 <= err.code < 500:
72-
# RFC 9309, Section 2.3.1.3: the crawler MAY access any
73-
# resources on the server.
69+
elif err.code >= 400 and err.code < 500:
7470
self.allow_all = True
75-
elif 500 <= err.code < 600:
76-
# RFC 9309, Section 2.3.1.4: the crawler MUST assume
77-
# complete disallow.
78-
self.disallow_all = True
7971
err.close()
8072
else:
8173
raw = f.read()

Misc/NEWS.d/next/Library/2025-09-05-20-50-35.gh-issue-79638.Y-JfaH.rst

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)