1 | # Test some Unicode file name semantics
|
---|
2 | # We dont test many operations on files other than
|
---|
3 | # that their names can be used with Unicode characters.
|
---|
4 | import os, glob, time, shutil
|
---|
5 | import unicodedata
|
---|
6 |
|
---|
7 | import unittest
|
---|
8 | from test.test_support import run_unittest, TESTFN_UNICODE
|
---|
9 | from test.test_support import TESTFN_ENCODING, TESTFN_UNENCODABLE
|
---|
10 | try:
|
---|
11 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
|
---|
12 | except (UnicodeError, TypeError):
|
---|
13 | # Either the file system encoding is None, or the file name
|
---|
14 | # cannot be encoded in the file system encoding.
|
---|
15 | raise unittest.SkipTest("No Unicode filesystem semantics on this platform.")
|
---|
16 |
|
---|
17 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE:
|
---|
18 | # The file system encoding does not support Latin-1
|
---|
19 | # (which test_support assumes), so try the file system
|
---|
20 | # encoding instead.
|
---|
21 | import sys
|
---|
22 | try:
|
---|
23 | TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding())
|
---|
24 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
|
---|
25 | if '?' in TESTFN_ENCODED:
|
---|
26 | # MBCS will not report the error properly
|
---|
27 | raise UnicodeError, "mbcs encoding problem"
|
---|
28 | except (UnicodeError, TypeError):
|
---|
29 | raise unittest.SkipTest("Cannot find a suiteable filename.")
|
---|
30 |
|
---|
31 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE:
|
---|
32 | raise unittest.SkipTest("Cannot find a suitable filename.")
|
---|
33 |
|
---|
34 | def remove_if_exists(filename):
|
---|
35 | if os.path.exists(filename):
|
---|
36 | os.unlink(filename)
|
---|
37 |
|
---|
38 | class TestUnicodeFiles(unittest.TestCase):
|
---|
39 | # The 'do_' functions are the actual tests. They generally assume the
|
---|
40 | # file already exists etc.
|
---|
41 |
|
---|
42 | # Do all the tests we can given only a single filename. The file should
|
---|
43 | # exist.
|
---|
44 | def _do_single(self, filename):
|
---|
45 | self.assertTrue(os.path.exists(filename))
|
---|
46 | self.assertTrue(os.path.isfile(filename))
|
---|
47 | self.assertTrue(os.access(filename, os.R_OK))
|
---|
48 | self.assertTrue(os.path.exists(os.path.abspath(filename)))
|
---|
49 | self.assertTrue(os.path.isfile(os.path.abspath(filename)))
|
---|
50 | self.assertTrue(os.access(os.path.abspath(filename), os.R_OK))
|
---|
51 | os.chmod(filename, 0777)
|
---|
52 | os.utime(filename, None)
|
---|
53 | os.utime(filename, (time.time(), time.time()))
|
---|
54 | # Copy/rename etc tests using the same filename
|
---|
55 | self._do_copyish(filename, filename)
|
---|
56 | # Filename should appear in glob output
|
---|
57 | self.assertTrue(
|
---|
58 | os.path.abspath(filename)==os.path.abspath(glob.glob(filename)[0]))
|
---|
59 | # basename should appear in listdir.
|
---|
60 | path, base = os.path.split(os.path.abspath(filename))
|
---|
61 | if isinstance(base, str):
|
---|
62 | base = base.decode(TESTFN_ENCODING)
|
---|
63 | file_list = os.listdir(path)
|
---|
64 | # listdir() with a unicode arg may or may not return Unicode
|
---|
65 | # objects, depending on the platform.
|
---|
66 | if file_list and isinstance(file_list[0], str):
|
---|
67 | file_list = [f.decode(TESTFN_ENCODING) for f in file_list]
|
---|
68 |
|
---|
69 | # Normalize the unicode strings, as round-tripping the name via the OS
|
---|
70 | # may return a different (but equivalent) value.
|
---|
71 | base = unicodedata.normalize("NFD", base)
|
---|
72 | file_list = [unicodedata.normalize("NFD", f) for f in file_list]
|
---|
73 |
|
---|
74 | self.assertIn(base, file_list)
|
---|
75 |
|
---|
76 | # Do as many "equivalancy' tests as we can - ie, check that although we
|
---|
77 | # have different types for the filename, they refer to the same file.
|
---|
78 | def _do_equivalent(self, filename1, filename2):
|
---|
79 | # Note we only check "filename1 against filename2" - we don't bother
|
---|
80 | # checking "filename2 against 1", as we assume we are called again with
|
---|
81 | # the args reversed.
|
---|
82 | self.assertTrue(type(filename1)!=type(filename2),
|
---|
83 | "No point checking equivalent filenames of the same type")
|
---|
84 | # stat and lstat should return the same results.
|
---|
85 | self.assertEqual(os.stat(filename1),
|
---|
86 | os.stat(filename2))
|
---|
87 | self.assertEqual(os.lstat(filename1),
|
---|
88 | os.lstat(filename2))
|
---|
89 | # Copy/rename etc tests using equivalent filename
|
---|
90 | self._do_copyish(filename1, filename2)
|
---|
91 |
|
---|
92 | # Tests that copy, move, etc one file to another.
|
---|
93 | def _do_copyish(self, filename1, filename2):
|
---|
94 | # Should be able to rename the file using either name.
|
---|
95 | self.assertTrue(os.path.isfile(filename1)) # must exist.
|
---|
96 | os.rename(filename1, filename2 + ".new")
|
---|
97 | self.assertTrue(os.path.isfile(filename1+".new"))
|
---|
98 | os.rename(filename1 + ".new", filename2)
|
---|
99 | self.assertTrue(os.path.isfile(filename2))
|
---|
100 |
|
---|
101 | shutil.copy(filename1, filename2 + ".new")
|
---|
102 | os.unlink(filename1 + ".new") # remove using equiv name.
|
---|
103 | # And a couple of moves, one using each name.
|
---|
104 | shutil.move(filename1, filename2 + ".new")
|
---|
105 | self.assertTrue(not os.path.exists(filename2))
|
---|
106 | shutil.move(filename1 + ".new", filename2)
|
---|
107 | self.assertTrue(os.path.exists(filename1))
|
---|
108 | # Note - due to the implementation of shutil.move,
|
---|
109 | # it tries a rename first. This only fails on Windows when on
|
---|
110 | # different file systems - and this test can't ensure that.
|
---|
111 | # So we test the shutil.copy2 function, which is the thing most
|
---|
112 | # likely to fail.
|
---|
113 | shutil.copy2(filename1, filename2 + ".new")
|
---|
114 | os.unlink(filename1 + ".new")
|
---|
115 |
|
---|
116 | def _do_directory(self, make_name, chdir_name, encoded):
|
---|
117 | cwd = os.getcwd()
|
---|
118 | if os.path.isdir(make_name):
|
---|
119 | os.rmdir(make_name)
|
---|
120 | os.mkdir(make_name)
|
---|
121 | try:
|
---|
122 | os.chdir(chdir_name)
|
---|
123 | try:
|
---|
124 | if not encoded:
|
---|
125 | cwd_result = os.getcwdu()
|
---|
126 | name_result = make_name
|
---|
127 | else:
|
---|
128 | cwd_result = os.getcwd().decode(TESTFN_ENCODING)
|
---|
129 | name_result = make_name.decode(TESTFN_ENCODING)
|
---|
130 |
|
---|
131 | cwd_result = unicodedata.normalize("NFD", cwd_result)
|
---|
132 | name_result = unicodedata.normalize("NFD", name_result)
|
---|
133 |
|
---|
134 | self.assertEqual(os.path.basename(cwd_result),name_result)
|
---|
135 | finally:
|
---|
136 | os.chdir(cwd)
|
---|
137 | finally:
|
---|
138 | os.rmdir(make_name)
|
---|
139 |
|
---|
140 | # The '_test' functions 'entry points with params' - ie, what the
|
---|
141 | # top-level 'test' functions would be if they could take params
|
---|
142 | def _test_single(self, filename):
|
---|
143 | remove_if_exists(filename)
|
---|
144 | f = file(filename, "w")
|
---|
145 | f.close()
|
---|
146 | try:
|
---|
147 | self._do_single(filename)
|
---|
148 | finally:
|
---|
149 | os.unlink(filename)
|
---|
150 | self.assertTrue(not os.path.exists(filename))
|
---|
151 | # and again with os.open.
|
---|
152 | f = os.open(filename, os.O_CREAT)
|
---|
153 | os.close(f)
|
---|
154 | try:
|
---|
155 | self._do_single(filename)
|
---|
156 | finally:
|
---|
157 | os.unlink(filename)
|
---|
158 |
|
---|
159 | def _test_equivalent(self, filename1, filename2):
|
---|
160 | remove_if_exists(filename1)
|
---|
161 | self.assertTrue(not os.path.exists(filename2))
|
---|
162 | f = file(filename1, "w")
|
---|
163 | f.close()
|
---|
164 | try:
|
---|
165 | self._do_equivalent(filename1, filename2)
|
---|
166 | finally:
|
---|
167 | os.unlink(filename1)
|
---|
168 |
|
---|
169 | # The 'test' functions are unittest entry points, and simply call our
|
---|
170 | # _test functions with each of the filename combinations we wish to test
|
---|
171 | def test_single_files(self):
|
---|
172 | self._test_single(TESTFN_ENCODED)
|
---|
173 | self._test_single(TESTFN_UNICODE)
|
---|
174 | if TESTFN_UNENCODABLE is not None:
|
---|
175 | self._test_single(TESTFN_UNENCODABLE)
|
---|
176 |
|
---|
177 | def test_equivalent_files(self):
|
---|
178 | self._test_equivalent(TESTFN_ENCODED, TESTFN_UNICODE)
|
---|
179 | self._test_equivalent(TESTFN_UNICODE, TESTFN_ENCODED)
|
---|
180 |
|
---|
181 | def test_directories(self):
|
---|
182 | # For all 'equivalent' combinations:
|
---|
183 | # Make dir with encoded, chdir with unicode, checkdir with encoded
|
---|
184 | # (or unicode/encoded/unicode, etc
|
---|
185 | ext = ".dir"
|
---|
186 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_ENCODED+ext, True)
|
---|
187 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_UNICODE+ext, True)
|
---|
188 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_ENCODED+ext, False)
|
---|
189 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False)
|
---|
190 | # Our directory name that can't use a non-unicode name.
|
---|
191 | if TESTFN_UNENCODABLE is not None:
|
---|
192 | self._do_directory(TESTFN_UNENCODABLE+ext,
|
---|
193 | TESTFN_UNENCODABLE+ext,
|
---|
194 | False)
|
---|
195 |
|
---|
196 | def test_main():
|
---|
197 | run_unittest(__name__)
|
---|
198 |
|
---|
199 | if __name__ == "__main__":
|
---|
200 | test_main()
|
---|