Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ Optimizations
using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`25996`.)

* The :func:`shutil.rmtree` function has been sped up to 20--40%.
This was done using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`28564`.)

* Optimized case-insensitive matching and searching of :mod:`regular
expressions <re>`. Searching some patterns can now be up to 20 times faster.
(Contributed by Serhiy Storchaka in :issue:`30285`.)
Expand Down Expand Up @@ -589,6 +593,11 @@ Changes in the Python API
* ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)

* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir`
function, the user specified handler *onerror* is now called with the first
argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory
is failed.


Changes in the C API
--------------------
Expand Down
73 changes: 43 additions & 30 deletions Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
# version vulnerable to race conditions
def _rmtree_unsafe(path, onerror):
try:
if os.path.islink(path):
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
with os.scandir(path) as scandir_it:
entries = list(scandir_it)
except OSError:
onerror(os.path.islink, path, sys.exc_info())
# can't continue even if onerror hook returns
return
names = []
try:
names = os.listdir(path)
except OSError:
onerror(os.listdir, path, sys.exc_info())
for name in names:
fullname = os.path.join(path, name)
onerror(os.scandir, path, sys.exc_info())
entries = []
for entry in entries:
fullname = entry.path
try:
mode = os.lstat(fullname).st_mode
is_dir = entry.is_dir(follow_symlinks=False)
except OSError:
mode = 0
if stat.S_ISDIR(mode):
is_dir = False
if is_dir:
try:
if entry.is_symlink():
# This can only happen if someone replaces
# a directory with a symlink after the call to
# os.scandir or entry.is_dir above.
raise OSError("Cannot call rmtree on a symbolic link")
except OSError:
onerror(os.path.islink, fullname, sys.exc_info())
continue
_rmtree_unsafe(fullname, onerror)
else:
try:
Expand All @@ -394,37 +396,40 @@ def _rmtree_unsafe(path, onerror):

# Version using fd-based APIs to protect against races
def _rmtree_safe_fd(topfd, path, onerror):
names = []
try:
names = os.listdir(topfd)
with os.scandir(topfd) as scandir_it:
entries = list(scandir_it)
except OSError as err:
err.filename = path
onerror(os.listdir, path, sys.exc_info())
for name in names:
fullname = os.path.join(path, name)
onerror(os.scandir, path, sys.exc_info())
return
for entry in entries:
fullname = os.path.join(path, entry.name)
try:
orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
mode = orig_st.st_mode
is_dir = entry.is_dir(follow_symlinks=False)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why you are doing this is_dir call before re-stat()ing below?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If is_dir is false, we can avoid calling stat(). On Posix scandir() usually fills the is_dir bit, but stat() needs a syscall.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But then why is stat() needed at all? Isn't is_dir() sufficient?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I hadn't noticed the use of orig_st below. My bad.

if is_dir:
orig_st = entry.stat(follow_symlinks=False)
is_dir = stat.S_ISDIR(orig_st.st_mode)
except OSError:
mode = 0
if stat.S_ISDIR(mode):
is_dir = False
if is_dir:
try:
dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
except OSError:
onerror(os.open, fullname, sys.exc_info())
else:
try:
if os.path.samestat(orig_st, os.fstat(dirfd)):
_rmtree_safe_fd(dirfd, fullname, onerror)
try:
os.rmdir(name, dir_fd=topfd)
os.rmdir(entry.name, dir_fd=topfd)
except OSError:
onerror(os.rmdir, fullname, sys.exc_info())
else:
try:
# This can only happen if someone replaces
# a directory with a symlink after the call to
# stat.S_ISDIR above.
# os.scandir or stat.S_ISDIR above.
raise OSError("Cannot call rmtree on a symbolic "
"link")
except OSError:
Expand All @@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror):
os.close(dirfd)
else:
try:
os.unlink(name, dir_fd=topfd)
os.unlink(entry.name, dir_fd=topfd)
except OSError:
onerror(os.unlink, fullname, sys.exc_info())

_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
os.supports_dir_fd and
os.listdir in os.supports_fd and
os.scandir in os.supports_fd and
os.stat in os.supports_follow_symlinks)

def rmtree(path, ignore_errors=False, onerror=None):
Expand Down Expand Up @@ -491,6 +496,14 @@ def onerror(*args):
finally:
os.close(fd)
else:
try:
if os.path.islink(path):
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError:
onerror(os.path.islink, path, sys.exc_info())
# can't continue even if onerror hook returns
return
return _rmtree_unsafe(path, onerror)

# Allow introspection of whether or not the hardening against symlink
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def onerror(*args):
errors.append(args)
shutil.rmtree(filename, onerror=onerror)
self.assertEqual(len(errors), 2)
self.assertIs(errors[0][0], os.listdir)
self.assertIs(errors[0][0], os.scandir)
self.assertEqual(errors[0][1], filename)
self.assertIsInstance(errors[0][2][1], NotADirectoryError)
self.assertIn(errors[0][2][1].filename, possible_args)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The shutil.rmtree() function has been sped up to 20--40%. This was done
using the os.scandir() function.