-
-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy path__init__.py
More file actions
210 lines (169 loc) · 5.26 KB
/
__init__.py
File metadata and controls
210 lines (169 loc) · 5.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/extractcode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
import os
import posixpath
import re
import shutil
import sys
from os.path import dirname
from os.path import join
from os.path import exists
from commoncode.fileutils import as_posixpath
from commoncode.fileutils import create_dir
from commoncode.fileutils import file_name
from commoncode.fileutils import parent_directory
from commoncode.text import toascii
from commoncode.system import on_linux
logger = logging.getLogger(__name__)
TRACE = False
if TRACE:
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
logger.setLevel(logging.DEBUG)
# Suffix added to extracted target_dir paths
EXTRACT_SUFFIX = '-extract'
# high level archive "kinds"
docs = 1
regular = 2
regular_nested = 3
package = 4
file_system = 5
patches = 6
special_package = 7
kind_labels = {
1: 'docs',
2: 'regular',
3: 'regular_nested',
4: 'package',
5: 'file_system',
6: 'patches',
7: 'special_package',
}
# note: we do not include special_package in all_kinds by default
all_kinds = (
regular,
regular_nested,
package,
file_system,
docs,
patches,
special_package,
)
default_kinds = (
regular,
regular_nested,
package,
)
# map user-visible extract types to tuples of "kinds"
extract_types = {
'default': default_kinds,
'all': all_kinds,
'package': (package,),
'filesystem': (file_system,),
'doc': (docs,),
'patch': (patches,),
'special_package': (special_package,),
}
def is_extraction_path(path):
"""
Return True is the path points to an extraction path.
"""
return path and path.rstrip('\\/').endswith(EXTRACT_SUFFIX)
def is_extracted(location):
"""
Return True is the location is already extracted to the corresponding
extraction location.
"""
return location and exists(get_extraction_path(location))
def get_extraction_path(path):
"""
Return a path where to extract.
"""
return path.rstrip('\\/') + EXTRACT_SUFFIX
def remove_archive_suffix(path):
"""
Remove all the extracted suffix from a path.
"""
return re.sub(EXTRACT_SUFFIX, '', path)
def remove_backslashes_and_dotdots(directory):
"""
Walk a directory and rename the files if their names contain backslashes.
Return a list of errors if any.
"""
errors = []
for top, _, files in os.walk(directory):
for filename in files:
if not ('\\' in filename or '..' in filename):
continue
try:
new_path = as_posixpath(filename).strip('/')
new_path = posixpath.normpath(new_path).replace('..', '/').strip('/')
new_path = posixpath.normpath(new_path)
segments = new_path.split('/')
directory = join(top, *segments[:-1])
create_dir(directory)
shutil.move(join(top, filename), join(top, *segments))
except Exception:
errors.append(join(top, filename))
return errors
def new_name(location, is_dir=False):
"""
Return a new non-existing location from a `location` usable to write a file
or create directory without overwriting existing files or directories in the
same parent directory, ignoring the case of the filename.
The case of the filename is ignored to ensure that similar results are
returned across case sensitive (*nix) and case insensitive file systems.
To find a new unique filename, this tries new names this way:
* pad a directory name with _X where X is an incremented number.
* pad a file base name with _X where X is an incremented number and keep
the extension unchanged.
"""
assert location
location = location.rstrip('\\/')
assert location
parent = parent_directory(location)
# all existing files or directory as lower case
siblings_lower = set(s.lower() for s in os.listdir(parent))
filename = file_name(location)
# corner case
if filename in ('.', '..'):
filename = '_'
# if unique, return this
if filename.lower() not in siblings_lower:
return join(parent, filename)
# otherwise seek a unique name
if is_dir:
# directories do not have an "extension"
base_name = filename
ext = ''
else:
base_name, dot, ext = filename.partition('.')
if dot:
ext = f'.{ext}'
else:
base_name = filename
ext = ''
# find a unique filename, adding a counter int to the base_name
counter = 1
while 1:
filename = f'{base_name}_{counter}{ext}'
if filename.lower() not in siblings_lower:
break
counter += 1
return join(parent, filename)
class ExtractError(Exception):
pass
class ExtractErrorPasswordProtected(ExtractError):
pass
class ExtractErrorFailedToExtract(ExtractError):
pass
class ExtractWarningIncorrectEntry(ExtractError):
pass
class ExtractWarningTrailingGarbage(ExtractError):
pass