blob: f3862066c4edc03cd2e005af7ee2ffb64633e715 [file] [log] [blame]
Michael Thvedt828a4be2015-08-12 17:45:36 +00001#!/usr/bin/python2.7
2
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00003# Copyright 2015 The Bazel Authors. All rights reserved.
Michael Thvedt828a4be2015-08-12 17:45:36 +00004#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http:#www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""A script for J2ObjC dead code removal in Blaze.
18
19This script removes unused J2ObjC-translated classes from compilation and
20linking by:
21 1. Build a class dependency tree among translated source files.
22 2. Use user-provided Java class entry points to get a list of reachable
23 classes.
24 3. Go through all translated source files and rewrite unreachable ones with
25 dummy content.
26"""
27
28import argparse
29from collections import OrderedDict
30import multiprocessing
31import os
diamondm2732b172018-05-24 12:58:37 -070032import pipes # swap to shlex once on Python 3
Michael Thvedt828a4be2015-08-12 17:45:36 +000033import Queue
Chris Parsons87846c72016-05-05 20:24:13 +000034import re
Michael Thvedt828a4be2015-08-12 17:45:36 +000035import shutil
Rumou Duan123e1c32016-02-01 16:16:15 +000036import subprocess
Michael Thvedt828a4be2015-08-12 17:45:36 +000037import threading
38
39PRUNED_SRC_CONTENT = 'static int DUMMY_unused __attribute__((unused,used)) = 0;'
40
41
42def BuildReachabilityTree(dependency_mapping_files, file_open=open):
43 """Builds a reachability tree using entries from dependency mapping files.
44
45 Args:
46 dependency_mapping_files: A comma separated list of J2ObjC-generated
47 dependency mapping files.
48 file_open: Reference to the builtin open function so it may be
49 overridden for testing.
50 Returns:
51 A dict mapping J2ObjC-generated source files to the corresponding direct
52 dependent source files.
53 """
54 tree = dict()
55 for dependency_mapping_file in dependency_mapping_files.split(','):
56 with file_open(dependency_mapping_file, 'r') as f:
57 for line in f:
58 entry = line.strip().split(':')[0]
59 dep = line.strip().split(':')[1]
60 if entry in tree:
61 tree[entry].append(dep)
62 else:
63 tree[entry] = [dep]
64 return tree
65
66
67def BuildHeaderMapping(header_mapping_files, file_open=open):
68 """Builds a mapping between Java classes and J2ObjC-translated header files.
69
70 Args:
71 header_mapping_files: A comma separated list of J2ObjC-generated
72 header mapping files.
73 file_open: Reference to the builtin open function so it may be
74 overridden for testing.
75 Returns:
76 An ordered dict mapping Java class names to corresponding J2ObjC-translated
77 source files.
78 """
79 header_mapping = OrderedDict()
80 for header_mapping_file in header_mapping_files.split(','):
81 with file_open(header_mapping_file, 'r') as f:
82 for line in f:
83 java_class_name = line.strip().split('=')[0]
84 transpiled_file_name = os.path.splitext(line.strip().split('=')[1])[0]
85 header_mapping[java_class_name] = transpiled_file_name
86 return header_mapping
87
88
Rumou Duan123e1c32016-02-01 16:16:15 +000089def BuildReachableFileSet(entry_classes, reachability_tree, header_mapping,
90 archive_source_file_mapping=None):
Michael Thvedt828a4be2015-08-12 17:45:36 +000091 """Builds a set of reachable translated files from entry Java classes.
92
93 Args:
94 entry_classes: A comma separated list of Java entry classes.
95 reachability_tree: A dict mapping translated files to their direct
96 dependencies.
97 header_mapping: A dict mapping Java class names to translated source files.
Rumou Duan123e1c32016-02-01 16:16:15 +000098 archive_source_file_mapping: A dict mapping source files to the associated
99 archive file that contains them.
Michael Thvedt828a4be2015-08-12 17:45:36 +0000100 Returns:
101 A set of reachable translated files from the given list of entry classes.
102 Raises:
103 Exception: If there is an entry class that is not being transpiled in this
104 j2objc_library.
105 """
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000106 transpiled_entry_files = []
Michael Thvedt828a4be2015-08-12 17:45:36 +0000107 for entry_class in entry_classes.split(','):
108 if entry_class not in header_mapping:
109 raise Exception(entry_class +
Googlerea481732017-08-07 18:02:11 +0200110 ' is not in the transitive Java deps of included ' +
Michael Thvedt828a4be2015-08-12 17:45:36 +0000111 'j2objc_library rules.')
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000112 transpiled_entry_files.append(header_mapping[entry_class])
113
Rumou Duan123e1c32016-02-01 16:16:15 +0000114 # Translated files going into the same static library archive with duplicated
115 # base names also need to be added to the set of entry files.
116 #
117 # This edge case is ignored because we currently cannot correctly perform
118 # dead code removal in this case. The object file entries in static library
119 # archives are named by the base names of the original source files. If two
120 # source files (e.g., foo/bar.m, bar/bar.m) go into the same archive and
121 # share the same base name (bar.m), their object file entries inside the
122 # archive will have the same name (bar.o). We cannot correctly handle this
123 # case because current archive tools (ar, ranlib, etc.) do not handle this
124 # case very well.
125 if archive_source_file_mapping:
126 transpiled_entry_files.extend(_DuplicatedFiles(archive_source_file_mapping))
127
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000128 # Translated files from package-info.java are also added to the entry files
129 # because they are needed to resolve ObjC class names with prefixes and these
130 # files may also have dependencies.
131 for transpiled_file in reachability_tree:
132 if transpiled_file.endswith('package-info'):
133 transpiled_entry_files.append(transpiled_file)
134
135 reachable_files = set()
136 for transpiled_entry_file in transpiled_entry_files:
137 reachable_files.add(transpiled_entry_file)
Miguel Alcon Pinto933c13a2015-09-16 18:37:45 +0000138 current_level_deps = []
139 # We need to check if the transpiled file is in the reachability tree
140 # because J2ObjC protos are not analyzed for dead code stripping and
141 # therefore are not in the reachability tree at all.
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000142 if transpiled_entry_file in reachability_tree:
143 current_level_deps = reachability_tree[transpiled_entry_file]
Michael Thvedt828a4be2015-08-12 17:45:36 +0000144 while current_level_deps:
145 next_level_deps = []
146 for dep in current_level_deps:
147 if dep not in reachable_files:
148 reachable_files.add(dep)
149 if dep in reachability_tree:
150 next_level_deps.extend(reachability_tree[dep])
151 current_level_deps = next_level_deps
152 return reachable_files
153
154
155def PruneFiles(input_files, output_files, objc_file_path, reachable_files,
156 file_open=open, file_shutil=shutil):
157 """Copies over translated files and remove the contents of unreachable files.
158
159 Args:
160 input_files: A comma separated list of input source files to prune. It has
161 a one-on-one pair mapping with the output_file list.
162 output_files: A comma separated list of output source files to write pruned
163 source files to. It has a one-on-one pair mapping with the input_file
164 list.
165 objc_file_path: The file path which represents a directory where the
166 generated ObjC files reside.
167 reachable_files: A set of reachable source files.
168 file_open: Reference to the builtin open function so it may be
169 overridden for testing.
170 file_shutil: Reference to the builtin shutil module so it may be
171 overridden for testing.
172 Returns:
173 None.
174 """
175 file_queue = Queue.Queue()
176 for input_file, output_file in zip(input_files.split(','),
177 output_files.split(',')):
178 file_queue.put((input_file, output_file))
179
180 for _ in xrange(multiprocessing.cpu_count()):
181 t = threading.Thread(target=_PruneFile, args=(file_queue,
182 reachable_files,
183 objc_file_path,
184 file_open,
185 file_shutil))
186 t.start()
187
188 file_queue.join()
189
190
191def _PruneFile(file_queue, reachable_files, objc_file_path, file_open=open,
192 file_shutil=shutil):
193 while True:
194 try:
195 input_file, output_file = file_queue.get_nowait()
196 except Queue.Empty:
197 return
198 file_name = os.path.relpath(os.path.splitext(input_file)[0],
199 objc_file_path)
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000200 if file_name in reachable_files:
Michael Thvedt828a4be2015-08-12 17:45:36 +0000201 file_shutil.copy(input_file, output_file)
202 else:
laszlocsomorf11c6bc2018-07-05 01:58:06 -0700203 with file_open(output_file, 'w') as f:
jingwen4a74c522018-11-20 11:57:15 -0800204 # Use a static variable scoped to the source file to suppress
laszlocsomorf11c6bc2018-07-05 01:58:06 -0700205 # the "has no symbols" linker warning for empty object files.
206 f.write(PRUNED_SRC_CONTENT)
Michael Thvedt828a4be2015-08-12 17:45:36 +0000207 file_queue.task_done()
208
209
Rumou Duan123e1c32016-02-01 16:16:15 +0000210def _DuplicatedFiles(archive_source_file_mapping):
211 """Returns a list of file with duplicated base names in each archive file.
212
213 Args:
214 archive_source_file_mapping: A dict mapping source files to the associated
215 archive file that contains them.
216 Returns:
jingwen4a74c522018-11-20 11:57:15 -0800217 A list containing files with duplicated base names.
Rumou Duan123e1c32016-02-01 16:16:15 +0000218 """
219 duplicated_files = []
220 dict_with_duplicates = dict()
221
Googler09894d12017-12-19 13:32:24 -0800222 for source_files in archive_source_file_mapping.values():
Rumou Duan123e1c32016-02-01 16:16:15 +0000223 for source_file in source_files:
224 file_basename = os.path.basename(source_file)
225 file_without_ext = os.path.splitext(source_file)[0]
226 if file_basename in dict_with_duplicates:
227 dict_with_duplicates[file_basename].append(file_without_ext)
228 else:
229 dict_with_duplicates[file_basename] = [file_without_ext]
230 for basename in dict_with_duplicates:
231 if len(dict_with_duplicates[basename]) > 1:
232 duplicated_files.extend(dict_with_duplicates[basename])
233 dict_with_duplicates = dict()
234
235 return duplicated_files
236
237
238def BuildArchiveSourceFileMapping(archive_source_mapping_files, file_open):
239 """Builds a mapping between archive files and their associated source files.
240
241 Args:
242 archive_source_mapping_files: A comma separated list of J2ObjC-generated
243 mapping between archive files and their associated source files.
244 file_open: Reference to the builtin open function so it may be
245 overridden for testing.
246 Returns:
247 A dict mapping between archive files and their associated source files.
248 """
249 tree = dict()
250 for archive_source_mapping_file in archive_source_mapping_files.split(','):
251 with file_open(archive_source_mapping_file, 'r') as f:
252 for line in f:
253 entry = line.strip().split(':')[0]
254 dep = line.strip().split(':')[1]
255 if entry in tree:
256 tree[entry].append(dep)
257 else:
258 tree[entry] = [dep]
259 return tree
260
261
262def PruneSourceFiles(input_files, output_files, dependency_mapping_files,
263 header_mapping_files, entry_classes, objc_file_path,
264 file_open=open, file_shutil=shutil):
Michael Thvedt828a4be2015-08-12 17:45:36 +0000265 """Copies over translated files and remove the contents of unreachable files.
266
267 Args:
268 input_files: A comma separated list of input source files to prune. It has
269 a one-on-one pair mapping with the output_file list.
270 output_files: A comma separated list of output source files to write pruned
271 source files to. It has a one-on-one pair mapping with the input_file
272 list.
273 dependency_mapping_files: A comma separated list of J2ObjC-generated
274 dependency mapping files.
275 header_mapping_files: A comma separated list of J2ObjC-generated
276 header mapping files.
277 entry_classes: A comma separated list of Java entry classes.
278 objc_file_path: The file path which represents a directory where the
279 generated ObjC files reside.
280 file_open: Reference to the builtin open function so it may be
281 overridden for testing.
282 file_shutil: Reference to the builtin shutil module so it may be
283 overridden for testing.
Michael Thvedt828a4be2015-08-12 17:45:36 +0000284 """
285 reachability_file_mapping = BuildReachabilityTree(
286 dependency_mapping_files, file_open)
287 header_map = BuildHeaderMapping(header_mapping_files, file_open)
288 reachable_files_set = BuildReachableFileSet(entry_classes,
289 reachability_file_mapping,
290 header_map)
291 PruneFiles(input_files,
292 output_files,
293 objc_file_path,
294 reachable_files_set,
295 file_open,
296 file_shutil)
297
298
Chris Parsons87846c72016-05-05 20:24:13 +0000299def MatchObjectNamesInArchive(xcrunwrapper, archive, object_names):
300 """Returns object names matching their identity in an archive file.
301
302 The linker that blaze uses appends an md5 hash to object file
303 names prior to inclusion in the archive file. Thus, object names
304 such as 'foo.o' need to be matched to their appropriate name in
305 the archive file, such as 'foo_<hash>.o'.
306
307 Args:
308 xcrunwrapper: A wrapper script over xcrun.
309 archive: The location of the archive file.
310 object_names: The expected basenames of object files to match,
311 sans extension. For example 'foo' (not 'foo.o').
312 Returns:
313 A list of basenames of matching members of the given archive
314 """
diamondm2732b172018-05-24 12:58:37 -0700315 ar_contents_cmd = [xcrunwrapper, 'ar', '-t', archive]
316 real_object_names = subprocess.check_output(ar_contents_cmd)
Googler62af2b42018-03-22 13:22:44 -0700317 expected_object_name_regex = r'^(?:%s)(?:_[0-9a-f]{32}(?:-[0-9]+)?)?\.o$' % (
Chris Parsons87846c72016-05-05 20:24:13 +0000318 '|'.join([re.escape(name) for name in object_names]))
319 return re.findall(expected_object_name_regex, real_object_names,
320 flags=re.MULTILINE)
321
322
Rumou Duan123e1c32016-02-01 16:16:15 +0000323def PruneArchiveFile(input_archive, output_archive, dummy_archive,
324 dependency_mapping_files, header_mapping_files,
325 archive_source_mapping_files, entry_classes, xcrunwrapper,
Chris Parsons87846c72016-05-05 20:24:13 +0000326 file_open=open):
Rumou Duan123e1c32016-02-01 16:16:15 +0000327 """Remove unreachable objects from archive file.
328
329 Args:
330 input_archive: The source archive file to prune.
331 output_archive: The location of the pruned archive file.
332 dummy_archive: A dummy archive file that contains no object.
333 dependency_mapping_files: A comma separated list of J2ObjC-generated
334 dependency mapping files.
335 header_mapping_files: A comma separated list of J2ObjC-generated
336 header mapping files.
337 archive_source_mapping_files: A comma separated list of J2ObjC-generated
338 mapping between archive files and their associated source files.
339 entry_classes: A comma separated list of Java entry classes.
340 xcrunwrapper: A wrapper script over xcrun.
341 file_open: Reference to the builtin open function so it may be
342 overridden for testing.
Rumou Duan123e1c32016-02-01 16:16:15 +0000343 """
344 reachability_file_mapping = BuildReachabilityTree(
345 dependency_mapping_files, file_open)
346 header_map = BuildHeaderMapping(header_mapping_files, file_open)
347 archive_source_file_mapping = BuildArchiveSourceFileMapping(
348 archive_source_mapping_files, file_open)
349 reachable_files_set = BuildReachableFileSet(entry_classes,
350 reachability_file_mapping,
351 header_map,
352 archive_source_file_mapping)
353
kaipi06b49282018-06-26 12:25:58 -0700354 # Copy the current processes' environment, as xcrunwrapper depends on these
355 # variables.
356 cmd_env = dict(os.environ)
Rumou Duan123e1c32016-02-01 16:16:15 +0000357 j2objc_cmd = ''
358 if input_archive in archive_source_file_mapping:
359 source_files = archive_source_file_mapping[input_archive]
360 unreachable_object_names = []
361
362 for source_file in source_files:
363 if os.path.splitext(source_file)[0] not in reachable_files_set:
364 unreachable_object_names.append(
Chris Parsons87846c72016-05-05 20:24:13 +0000365 os.path.basename(os.path.splitext(source_file)[0]))
Rumou Duan123e1c32016-02-01 16:16:15 +0000366
367 # There are unreachable objects in the archive to prune
368 if unreachable_object_names:
369 # If all objects in the archive are unreachable, just copy over a dummy
370 # archive that contains no object
371 if len(unreachable_object_names) == len(source_files):
diamondm2732b172018-05-24 12:58:37 -0700372 j2objc_cmd = 'cp %s %s' % (pipes.quote(dummy_archive),
373 pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000374 # Else we need to prune the archive of unreachable objects
375 else:
Googler60a7e632016-08-31 19:44:21 +0000376 cmd_env['ZERO_AR_DATE'] = '1'
Rumou Duan123e1c32016-02-01 16:16:15 +0000377 # Copy the input archive to the output location
diamondm2732b172018-05-24 12:58:37 -0700378 j2objc_cmd += 'cp %s %s && ' % (pipes.quote(input_archive),
379 pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000380 # Make the output archive editable
diamondm2732b172018-05-24 12:58:37 -0700381 j2objc_cmd += 'chmod +w %s && ' % (pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000382 # Remove the unreachable objects from the archive
Chris Parsons87846c72016-05-05 20:24:13 +0000383 unreachable_object_names = MatchObjectNamesInArchive(
384 xcrunwrapper, input_archive, unreachable_object_names)
Rumou Duanf95bfa62016-11-10 18:52:49 +0000385 j2objc_cmd += '%s ar -d -s %s %s && ' % (
diamondm2732b172018-05-24 12:58:37 -0700386 pipes.quote(xcrunwrapper),
387 pipes.quote(output_archive),
388 ' '.join(pipes.quote(uon) for uon in unreachable_object_names))
Rumou Duan123e1c32016-02-01 16:16:15 +0000389 # Update the table of content of the archive file
diamondm2732b172018-05-24 12:58:37 -0700390 j2objc_cmd += '%s ranlib %s' % (pipes.quote(xcrunwrapper),
391 pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000392 # There are no unreachable objects, we just copy over the original archive
393 else:
diamondm2732b172018-05-24 12:58:37 -0700394 j2objc_cmd = 'cp %s %s' % (pipes.quote(input_archive),
395 pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000396 # The archive cannot be pruned by J2ObjC dead code removal, just copy over
397 # the original archive
398 else:
diamondm2732b172018-05-24 12:58:37 -0700399 j2objc_cmd = 'cp %s %s' % (pipes.quote(input_archive),
400 pipes.quote(output_archive))
Rumou Duan123e1c32016-02-01 16:16:15 +0000401
Googlerfc60b3c2019-02-04 07:35:41 -0800402 try:
403 subprocess.check_output(
404 j2objc_cmd, stderr=subprocess.STDOUT, shell=True, env=cmd_env)
405 except OSError as e:
406 raise Exception(
407 'executing command failed: %s (%s)' % (j2objc_cmd, e.strerror))
Googler60a7e632016-08-31 19:44:21 +0000408
409 # "Touch" the output file.
410 # Prevents a pre-Xcode-8 bug in which passing zero-date archive files to ld
411 # would cause ld to error.
Rumou Duanf95bfa62016-11-10 18:52:49 +0000412 os.utime(output_archive, None)
Rumou Duan123e1c32016-02-01 16:16:15 +0000413
414
Michael Thvedt828a4be2015-08-12 17:45:36 +0000415if __name__ == '__main__':
Rumou Duanab16dd62015-08-18 21:52:08 +0000416 parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
Rumou Duan123e1c32016-02-01 16:16:15 +0000417
418 # TODO(rduan): Remove these three flags once J2ObjC compile actions are fully
419 # moved to the edges.
Michael Thvedt828a4be2015-08-12 17:45:36 +0000420 parser.add_argument(
421 '--input_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000422 help=('The comma-separated file paths of translated source files to '
423 'prune.'))
424 parser.add_argument(
425 '--output_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000426 help='The comma-separated file paths of pruned source files to write to.')
427 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000428 '--objc_file_path',
429 help='The file path which represents a directory where the generated ObjC'
430 ' files reside')
431
432 parser.add_argument(
433 '--input_archive',
434 help=('The path of the translated archive to prune.'))
435 parser.add_argument(
436 '--output_archive',
437 help='The path of the pruned archive file to write to.')
438 parser.add_argument(
439 '--dummy_archive',
440 help='The dummy archive file that contains no symbol.')
441 parser.add_argument(
Michael Thvedt828a4be2015-08-12 17:45:36 +0000442 '--dependency_mapping_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000443 help='The comma-separated file paths of dependency mapping files.')
444 parser.add_argument(
445 '--header_mapping_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000446 help='The comma-separated file paths of header mapping files.')
447 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000448 '--archive_source_mapping_files',
449 help='The comma-separated file paths of archive to source mapping files.'
450 'These mapping files should contain mappings between the '
451 'translated source files and the archive file compiled from those '
452 'source files.')
453 parser.add_argument(
Michael Thvedt828a4be2015-08-12 17:45:36 +0000454 '--entry_classes',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000455 help=('The comma-separated list of Java entry classes to be used as entry'
456 ' point of the dead code anlysis.'))
457 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000458 '--xcrunwrapper',
459 help=('The xcrun wrapper script.'))
460
Michael Thvedt828a4be2015-08-12 17:45:36 +0000461 args = parser.parse_args()
462
463 if not args.entry_classes:
464 raise Exception('J2objC dead code removal is on but no entry class is ',
465 'specified in any j2objc_library targets in the transitive',
466 ' closure')
Rumou Duan123e1c32016-02-01 16:16:15 +0000467 if args.input_archive and args.output_archive:
468 PruneArchiveFile(
469 args.input_archive,
470 args.output_archive,
471 args.dummy_archive,
472 args.dependency_mapping_files,
473 args.header_mapping_files,
474 args.archive_source_mapping_files,
475 args.entry_classes,
476 args.xcrunwrapper)
477 else:
478 # TODO(rduan): Remove once J2ObjC compile actions are fully moved to the
479 # edges.
480 PruneSourceFiles(
481 args.input_files,
482 args.output_files,
483 args.dependency_mapping_files,
484 args.header_mapping_files,
485 args.entry_classes,
486 args.objc_file_path)