1
2
3
4
5
6
7 """
8 Various functions used by other modules
9 @var invalid_chars_in_filename: a mix of characters not permitted in most used filesystems
10 @var invalid_windows_name: a list of unauthorized filenames under Windows
11 """
12
13 import sys
14
15 invalid_chars_in_filename=b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' \
16 b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' \
17 b'<>:"/\\|?*\%\''
18
19 invalid_windows_name=[b'CON', b'PRN', b'AUX', b'NUL', b'COM1', b'COM2', b'COM3',
20 b'COM4', b'COM5', b'COM6', b'COM7', b'COM8', b'COM9',
21 b'LPT1', b'LPT2', b'LPT3', b'LPT4', b'LPT5', b'LPT6', b'LPT7',
22 b'LPT8', b'LPT9' ]
23
25 """
26 Convert the given filename into a name that should work on all
27 platform. Remove non us-ascii characters, and drop invalid filename.
28 Use the I{alternative} filename if needed.
29
30 @type filename: unicode or None
31 @param filename: the originale filename or None. Can be unicode.
32 @type alt_name: str
33 @param alt_name: the alternative filename if filename is None or useless
34 @type alt_ext: str
35 @param alt_ext: the alternative filename extension (including the '.')
36
37 @rtype: str
38 @returns: a valid filename.
39
40 >>> sanitize_filename('document.txt', 'file', '.txt')
41 'document.txt'
42 >>> sanitize_filename('number1.txt', 'file', '.txt')
43 'number1.txt'
44 >>> sanitize_filename(None, 'file', '.txt')
45 'file.txt'
46 >>> sanitize_filename(u'R\\xe9pertoir.txt', 'file', '.txt')
47 'Rpertoir.txt'
48 >>> # the '\\xe9' has been removed
49 >>> sanitize_filename(u'\\xe9\\xe6.html', 'file', '.txt')
50 'file.html'
51 >>> # all non us-ascii characters have been removed, the alternative name
52 >>> # has been used the replace empty string. The originale extention
53 >>> # is still valid
54 >>> sanitize_filename(u'COM1.txt', 'file', '.txt')
55 'COM1A.txt'
56 >>> # if name match an invalid name or assimilated then a A is added
57 """
58
59 if not filename:
60 return alt_name+alt_ext
61
62 if ((sys.version_info<(3, 0) and isinstance(filename, unicode)) or \
63 (sys.version_info>=(3, 0) and isinstance(filename, str))):
64 filename=filename.encode('ascii', 'ignore')
65
66 filename=filename.translate(None, invalid_chars_in_filename)
67 filename=filename.strip()
68
69 upper=filename.upper()
70 for name in invalid_windows_name:
71 if upper==name:
72 filename=filename+b'A'
73 break
74 if upper.startswith(name+b'.'):
75 filename=filename[:len(name)]+b'A'+filename[len(name):]
76 break
77
78 if sys.version_info>=(3, 0):
79
80 filename=filename.decode('us-ascii')
81
82 if filename.rfind('.')==0:
83 filename=alt_name+filename
84
85 return filename
86
88 """
89 Avoid filename collision, add a sequence number to the name when required.
90 'file.txt' will be renamed into 'file-01.txt' then 'file-02.txt' ...
91 until their is no more collision. The file is not added to the list.
92
93 Windows don't make the difference between lower and upper case. To avoid
94 "case" collision, the function compare C{filename.lower()} to the list.
95 If you provide a list in lower case only, then any collisions will be avoided.
96
97 @type filename: str
98 @param filename: the filename
99 @type filenames: list or set
100 @param filenames: a list of filenames.
101
102 @rtype: str
103 @returns: the I{filename} or the appropriately I{indexed} I{filename}
104
105 >>> handle_filename_collision('file.txt', [ ])
106 'file.txt'
107 >>> handle_filename_collision('file.txt', [ 'file.txt' ])
108 'file-01.txt'
109 >>> handle_filename_collision('file.txt', [ 'file.txt', 'file-01.txt',])
110 'file-02.txt'
111 >>> handle_filename_collision('foo', [ 'foo',])
112 'foo-01'
113 >>> handle_filename_collision('foo', [ 'foo', 'foo-01',])
114 'foo-02'
115 >>> handle_filename_collision('FOO', [ 'foo', 'foo-01',])
116 'FOO-02'
117 """
118 if filename.lower() in filenames:
119 try:
120 basename, ext=filename.rsplit('.', 1)
121 ext='.'+ext
122 except ValueError:
123 basename, ext=filename, ''
124
125 i=1
126 while True:
127 filename='%s-%02d%s' % (basename, i, ext)
128 if filename.lower() not in filenames:
129 break
130 i+=1
131
132 return filename
133
135 """"
136 test if string contains us-ascii characters only
137
138 >>> is_usascii('foo')
139 True
140 >>> is_usascii(u'foo')
141 True
142 >>> is_usascii(u'Fran\xe7ais')
143 False
144 >>> is_usascii('bad\x81')
145 False
146 """
147 try:
148
149
150 value.encode('us-ascii')
151 except UnicodeError:
152 return False
153
154 return True
155