blob: a5693fbaf6d6445fc2c69c6bb4f7d6f633afcf30 [file] [log] [blame]
Michael Thvedt828a4be2015-08-12 17:45:36 +00001#!/usr/bin/python2.7
2
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00003# Copyright 2015 The Bazel Authors. All rights reserved.
Michael Thvedt828a4be2015-08-12 17:45:36 +00004#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http:#www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""A script for J2ObjC dead code removal in Blaze.
18
19This script removes unused J2ObjC-translated classes from compilation and
20linking by:
21 1. Build a class dependency tree among translated source files.
22 2. Use user-provided Java class entry points to get a list of reachable
23 classes.
24 3. Go through all translated source files and rewrite unreachable ones with
25 dummy content.
26"""
27
28import argparse
29from collections import OrderedDict
30import multiprocessing
31import os
32import Queue
Chris Parsons87846c72016-05-05 20:24:13 +000033import re
Michael Thvedt828a4be2015-08-12 17:45:36 +000034import shutil
Rumou Duan123e1c32016-02-01 16:16:15 +000035import subprocess
Michael Thvedt828a4be2015-08-12 17:45:36 +000036import threading
37
38PRUNED_SRC_CONTENT = 'static int DUMMY_unused __attribute__((unused,used)) = 0;'
39
40
41def BuildReachabilityTree(dependency_mapping_files, file_open=open):
42 """Builds a reachability tree using entries from dependency mapping files.
43
44 Args:
45 dependency_mapping_files: A comma separated list of J2ObjC-generated
46 dependency mapping files.
47 file_open: Reference to the builtin open function so it may be
48 overridden for testing.
49 Returns:
50 A dict mapping J2ObjC-generated source files to the corresponding direct
51 dependent source files.
52 """
53 tree = dict()
54 for dependency_mapping_file in dependency_mapping_files.split(','):
55 with file_open(dependency_mapping_file, 'r') as f:
56 for line in f:
57 entry = line.strip().split(':')[0]
58 dep = line.strip().split(':')[1]
59 if entry in tree:
60 tree[entry].append(dep)
61 else:
62 tree[entry] = [dep]
63 return tree
64
65
66def BuildHeaderMapping(header_mapping_files, file_open=open):
67 """Builds a mapping between Java classes and J2ObjC-translated header files.
68
69 Args:
70 header_mapping_files: A comma separated list of J2ObjC-generated
71 header mapping files.
72 file_open: Reference to the builtin open function so it may be
73 overridden for testing.
74 Returns:
75 An ordered dict mapping Java class names to corresponding J2ObjC-translated
76 source files.
77 """
78 header_mapping = OrderedDict()
79 for header_mapping_file in header_mapping_files.split(','):
80 with file_open(header_mapping_file, 'r') as f:
81 for line in f:
82 java_class_name = line.strip().split('=')[0]
83 transpiled_file_name = os.path.splitext(line.strip().split('=')[1])[0]
84 header_mapping[java_class_name] = transpiled_file_name
85 return header_mapping
86
87
Rumou Duan123e1c32016-02-01 16:16:15 +000088def BuildReachableFileSet(entry_classes, reachability_tree, header_mapping,
89 archive_source_file_mapping=None):
Michael Thvedt828a4be2015-08-12 17:45:36 +000090 """Builds a set of reachable translated files from entry Java classes.
91
92 Args:
93 entry_classes: A comma separated list of Java entry classes.
94 reachability_tree: A dict mapping translated files to their direct
95 dependencies.
96 header_mapping: A dict mapping Java class names to translated source files.
Rumou Duan123e1c32016-02-01 16:16:15 +000097 archive_source_file_mapping: A dict mapping source files to the associated
98 archive file that contains them.
Michael Thvedt828a4be2015-08-12 17:45:36 +000099 Returns:
100 A set of reachable translated files from the given list of entry classes.
101 Raises:
102 Exception: If there is an entry class that is not being transpiled in this
103 j2objc_library.
104 """
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000105 transpiled_entry_files = []
Michael Thvedt828a4be2015-08-12 17:45:36 +0000106 for entry_class in entry_classes.split(','):
107 if entry_class not in header_mapping:
108 raise Exception(entry_class +
109 'is not in the transitive Java deps of included ' +
110 'j2objc_library rules.')
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000111 transpiled_entry_files.append(header_mapping[entry_class])
112
Rumou Duan123e1c32016-02-01 16:16:15 +0000113 # Translated files going into the same static library archive with duplicated
114 # base names also need to be added to the set of entry files.
115 #
116 # This edge case is ignored because we currently cannot correctly perform
117 # dead code removal in this case. The object file entries in static library
118 # archives are named by the base names of the original source files. If two
119 # source files (e.g., foo/bar.m, bar/bar.m) go into the same archive and
120 # share the same base name (bar.m), their object file entries inside the
121 # archive will have the same name (bar.o). We cannot correctly handle this
122 # case because current archive tools (ar, ranlib, etc.) do not handle this
123 # case very well.
124 if archive_source_file_mapping:
125 transpiled_entry_files.extend(_DuplicatedFiles(archive_source_file_mapping))
126
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000127 # Translated files from package-info.java are also added to the entry files
128 # because they are needed to resolve ObjC class names with prefixes and these
129 # files may also have dependencies.
130 for transpiled_file in reachability_tree:
131 if transpiled_file.endswith('package-info'):
132 transpiled_entry_files.append(transpiled_file)
133
134 reachable_files = set()
135 for transpiled_entry_file in transpiled_entry_files:
136 reachable_files.add(transpiled_entry_file)
Miguel Alcon Pinto933c13a2015-09-16 18:37:45 +0000137 current_level_deps = []
138 # We need to check if the transpiled file is in the reachability tree
139 # because J2ObjC protos are not analyzed for dead code stripping and
140 # therefore are not in the reachability tree at all.
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000141 if transpiled_entry_file in reachability_tree:
142 current_level_deps = reachability_tree[transpiled_entry_file]
Michael Thvedt828a4be2015-08-12 17:45:36 +0000143 while current_level_deps:
144 next_level_deps = []
145 for dep in current_level_deps:
146 if dep not in reachable_files:
147 reachable_files.add(dep)
148 if dep in reachability_tree:
149 next_level_deps.extend(reachability_tree[dep])
150 current_level_deps = next_level_deps
151 return reachable_files
152
153
154def PruneFiles(input_files, output_files, objc_file_path, reachable_files,
155 file_open=open, file_shutil=shutil):
156 """Copies over translated files and remove the contents of unreachable files.
157
158 Args:
159 input_files: A comma separated list of input source files to prune. It has
160 a one-on-one pair mapping with the output_file list.
161 output_files: A comma separated list of output source files to write pruned
162 source files to. It has a one-on-one pair mapping with the input_file
163 list.
164 objc_file_path: The file path which represents a directory where the
165 generated ObjC files reside.
166 reachable_files: A set of reachable source files.
167 file_open: Reference to the builtin open function so it may be
168 overridden for testing.
169 file_shutil: Reference to the builtin shutil module so it may be
170 overridden for testing.
171 Returns:
172 None.
173 """
174 file_queue = Queue.Queue()
175 for input_file, output_file in zip(input_files.split(','),
176 output_files.split(',')):
177 file_queue.put((input_file, output_file))
178
179 for _ in xrange(multiprocessing.cpu_count()):
180 t = threading.Thread(target=_PruneFile, args=(file_queue,
181 reachable_files,
182 objc_file_path,
183 file_open,
184 file_shutil))
185 t.start()
186
187 file_queue.join()
188
189
190def _PruneFile(file_queue, reachable_files, objc_file_path, file_open=open,
191 file_shutil=shutil):
192 while True:
193 try:
194 input_file, output_file = file_queue.get_nowait()
195 except Queue.Empty:
196 return
197 file_name = os.path.relpath(os.path.splitext(input_file)[0],
198 objc_file_path)
Rumou Duan44a7a6c2015-09-16 22:28:32 +0000199 if file_name in reachable_files:
Michael Thvedt828a4be2015-08-12 17:45:36 +0000200 file_shutil.copy(input_file, output_file)
201 else:
202 f = file_open(output_file, 'w')
203 # Use a static variable scoped to the source file to supress
204 # the "has no symbols" linker warning for empty object files.
205 f.write(PRUNED_SRC_CONTENT)
206 f.close()
207 file_queue.task_done()
208
209
Rumou Duan123e1c32016-02-01 16:16:15 +0000210def _DuplicatedFiles(archive_source_file_mapping):
211 """Returns a list of file with duplicated base names in each archive file.
212
213 Args:
214 archive_source_file_mapping: A dict mapping source files to the associated
215 archive file that contains them.
216 Returns:
217 A list containg files with duplicated base names.
218 """
219 duplicated_files = []
220 dict_with_duplicates = dict()
221
222 for archive, source_files in archive_source_file_mapping.iteritems():
223 for source_file in source_files:
224 file_basename = os.path.basename(source_file)
225 file_without_ext = os.path.splitext(source_file)[0]
226 if file_basename in dict_with_duplicates:
227 dict_with_duplicates[file_basename].append(file_without_ext)
228 else:
229 dict_with_duplicates[file_basename] = [file_without_ext]
230 for basename in dict_with_duplicates:
231 if len(dict_with_duplicates[basename]) > 1:
232 duplicated_files.extend(dict_with_duplicates[basename])
233 dict_with_duplicates = dict()
234
235 return duplicated_files
236
237
238def BuildArchiveSourceFileMapping(archive_source_mapping_files, file_open):
239 """Builds a mapping between archive files and their associated source files.
240
241 Args:
242 archive_source_mapping_files: A comma separated list of J2ObjC-generated
243 mapping between archive files and their associated source files.
244 file_open: Reference to the builtin open function so it may be
245 overridden for testing.
246 Returns:
247 A dict mapping between archive files and their associated source files.
248 """
249 tree = dict()
250 for archive_source_mapping_file in archive_source_mapping_files.split(','):
251 with file_open(archive_source_mapping_file, 'r') as f:
252 for line in f:
253 entry = line.strip().split(':')[0]
254 dep = line.strip().split(':')[1]
255 if entry in tree:
256 tree[entry].append(dep)
257 else:
258 tree[entry] = [dep]
259 return tree
260
261
262def PruneSourceFiles(input_files, output_files, dependency_mapping_files,
263 header_mapping_files, entry_classes, objc_file_path,
264 file_open=open, file_shutil=shutil):
Michael Thvedt828a4be2015-08-12 17:45:36 +0000265 """Copies over translated files and remove the contents of unreachable files.
266
267 Args:
268 input_files: A comma separated list of input source files to prune. It has
269 a one-on-one pair mapping with the output_file list.
270 output_files: A comma separated list of output source files to write pruned
271 source files to. It has a one-on-one pair mapping with the input_file
272 list.
273 dependency_mapping_files: A comma separated list of J2ObjC-generated
274 dependency mapping files.
275 header_mapping_files: A comma separated list of J2ObjC-generated
276 header mapping files.
277 entry_classes: A comma separated list of Java entry classes.
278 objc_file_path: The file path which represents a directory where the
279 generated ObjC files reside.
280 file_open: Reference to the builtin open function so it may be
281 overridden for testing.
282 file_shutil: Reference to the builtin shutil module so it may be
283 overridden for testing.
Michael Thvedt828a4be2015-08-12 17:45:36 +0000284 """
285 reachability_file_mapping = BuildReachabilityTree(
286 dependency_mapping_files, file_open)
287 header_map = BuildHeaderMapping(header_mapping_files, file_open)
288 reachable_files_set = BuildReachableFileSet(entry_classes,
289 reachability_file_mapping,
290 header_map)
291 PruneFiles(input_files,
292 output_files,
293 objc_file_path,
294 reachable_files_set,
295 file_open,
296 file_shutil)
297
298
Chris Parsons87846c72016-05-05 20:24:13 +0000299def MatchObjectNamesInArchive(xcrunwrapper, archive, object_names):
300 """Returns object names matching their identity in an archive file.
301
302 The linker that blaze uses appends an md5 hash to object file
303 names prior to inclusion in the archive file. Thus, object names
304 such as 'foo.o' need to be matched to their appropriate name in
305 the archive file, such as 'foo_<hash>.o'.
306
307 Args:
308 xcrunwrapper: A wrapper script over xcrun.
309 archive: The location of the archive file.
310 object_names: The expected basenames of object files to match,
311 sans extension. For example 'foo' (not 'foo.o').
312 Returns:
313 A list of basenames of matching members of the given archive
314 """
315 ar_contents_cmd = '%s ar -t %s' % (xcrunwrapper, archive)
316 real_object_names = subprocess.check_output(ar_contents_cmd, shell=True)
317 expected_object_name_regex = '^(?:%s)_[0-9a-f]{32}.o' % (
318 '|'.join([re.escape(name) for name in object_names]))
319 return re.findall(expected_object_name_regex, real_object_names,
320 flags=re.MULTILINE)
321
322
Rumou Duan123e1c32016-02-01 16:16:15 +0000323def PruneArchiveFile(input_archive, output_archive, dummy_archive,
324 dependency_mapping_files, header_mapping_files,
325 archive_source_mapping_files, entry_classes, xcrunwrapper,
Chris Parsons87846c72016-05-05 20:24:13 +0000326 file_open=open):
Rumou Duan123e1c32016-02-01 16:16:15 +0000327 """Remove unreachable objects from archive file.
328
329 Args:
330 input_archive: The source archive file to prune.
331 output_archive: The location of the pruned archive file.
332 dummy_archive: A dummy archive file that contains no object.
333 dependency_mapping_files: A comma separated list of J2ObjC-generated
334 dependency mapping files.
335 header_mapping_files: A comma separated list of J2ObjC-generated
336 header mapping files.
337 archive_source_mapping_files: A comma separated list of J2ObjC-generated
338 mapping between archive files and their associated source files.
339 entry_classes: A comma separated list of Java entry classes.
340 xcrunwrapper: A wrapper script over xcrun.
341 file_open: Reference to the builtin open function so it may be
342 overridden for testing.
Rumou Duan123e1c32016-02-01 16:16:15 +0000343 """
344 reachability_file_mapping = BuildReachabilityTree(
345 dependency_mapping_files, file_open)
346 header_map = BuildHeaderMapping(header_mapping_files, file_open)
347 archive_source_file_mapping = BuildArchiveSourceFileMapping(
348 archive_source_mapping_files, file_open)
349 reachable_files_set = BuildReachableFileSet(entry_classes,
350 reachability_file_mapping,
351 header_map,
352 archive_source_file_mapping)
353
Googler60a7e632016-08-31 19:44:21 +0000354 cmd_env = {}
Rumou Duan123e1c32016-02-01 16:16:15 +0000355 j2objc_cmd = ''
356 if input_archive in archive_source_file_mapping:
357 source_files = archive_source_file_mapping[input_archive]
358 unreachable_object_names = []
359
360 for source_file in source_files:
361 if os.path.splitext(source_file)[0] not in reachable_files_set:
362 unreachable_object_names.append(
Chris Parsons87846c72016-05-05 20:24:13 +0000363 os.path.basename(os.path.splitext(source_file)[0]))
Rumou Duan123e1c32016-02-01 16:16:15 +0000364
365 # There are unreachable objects in the archive to prune
366 if unreachable_object_names:
367 # If all objects in the archive are unreachable, just copy over a dummy
368 # archive that contains no object
369 if len(unreachable_object_names) == len(source_files):
370 j2objc_cmd = 'cp %s %s' % (dummy_archive, output_archive)
371 # Else we need to prune the archive of unreachable objects
372 else:
Googler60a7e632016-08-31 19:44:21 +0000373 cmd_env['ZERO_AR_DATE'] = '1'
Rumou Duan123e1c32016-02-01 16:16:15 +0000374 # Copy the input archive to the output location
Rumou Duanf95bfa62016-11-10 18:52:49 +0000375 j2objc_cmd += 'cp %s %s && ' % (input_archive, output_archive)
Rumou Duan123e1c32016-02-01 16:16:15 +0000376 # Make the output archive editable
Rumou Duanf95bfa62016-11-10 18:52:49 +0000377 j2objc_cmd += 'chmod +w %s && ' % (output_archive)
Rumou Duan123e1c32016-02-01 16:16:15 +0000378 # Remove the unreachable objects from the archive
Chris Parsons87846c72016-05-05 20:24:13 +0000379 unreachable_object_names = MatchObjectNamesInArchive(
380 xcrunwrapper, input_archive, unreachable_object_names)
Rumou Duane7fd5392017-01-09 20:33:33 +0000381 # We need to quote the object names because they may contains special
382 # shell characters.
383 quoted_unreachable_object_names = [
384 "'" + unreachable_object_name + "'"
385 for unreachable_object_name in unreachable_object_names]
Rumou Duanf95bfa62016-11-10 18:52:49 +0000386 j2objc_cmd += '%s ar -d -s %s %s && ' % (
Rumou Duane7fd5392017-01-09 20:33:33 +0000387 xcrunwrapper,
388 output_archive,
389 ' '.join(quoted_unreachable_object_names))
Rumou Duan123e1c32016-02-01 16:16:15 +0000390 # Update the table of content of the archive file
Googler60a7e632016-08-31 19:44:21 +0000391 j2objc_cmd += '%s ranlib %s' % (xcrunwrapper, output_archive)
Rumou Duan123e1c32016-02-01 16:16:15 +0000392 # There are no unreachable objects, we just copy over the original archive
393 else:
394 j2objc_cmd = 'cp %s %s' % (input_archive, output_archive)
395 # The archive cannot be pruned by J2ObjC dead code removal, just copy over
396 # the original archive
397 else:
398 j2objc_cmd = 'cp %s %s' % (input_archive, output_archive)
399
Googler60a7e632016-08-31 19:44:21 +0000400 subprocess.check_output(
401 j2objc_cmd, stderr=subprocess.STDOUT, shell=True, env=cmd_env)
402
403 # "Touch" the output file.
404 # Prevents a pre-Xcode-8 bug in which passing zero-date archive files to ld
405 # would cause ld to error.
Rumou Duanf95bfa62016-11-10 18:52:49 +0000406 os.utime(output_archive, None)
Rumou Duan123e1c32016-02-01 16:16:15 +0000407
408
Michael Thvedt828a4be2015-08-12 17:45:36 +0000409if __name__ == '__main__':
Rumou Duanab16dd62015-08-18 21:52:08 +0000410 parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
Rumou Duan123e1c32016-02-01 16:16:15 +0000411
412 # TODO(rduan): Remove these three flags once J2ObjC compile actions are fully
413 # moved to the edges.
Michael Thvedt828a4be2015-08-12 17:45:36 +0000414 parser.add_argument(
415 '--input_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000416 help=('The comma-separated file paths of translated source files to '
417 'prune.'))
418 parser.add_argument(
419 '--output_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000420 help='The comma-separated file paths of pruned source files to write to.')
421 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000422 '--objc_file_path',
423 help='The file path which represents a directory where the generated ObjC'
424 ' files reside')
425
426 parser.add_argument(
427 '--input_archive',
428 help=('The path of the translated archive to prune.'))
429 parser.add_argument(
430 '--output_archive',
431 help='The path of the pruned archive file to write to.')
432 parser.add_argument(
433 '--dummy_archive',
434 help='The dummy archive file that contains no symbol.')
435 parser.add_argument(
Michael Thvedt828a4be2015-08-12 17:45:36 +0000436 '--dependency_mapping_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000437 help='The comma-separated file paths of dependency mapping files.')
438 parser.add_argument(
439 '--header_mapping_files',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000440 help='The comma-separated file paths of header mapping files.')
441 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000442 '--archive_source_mapping_files',
443 help='The comma-separated file paths of archive to source mapping files.'
444 'These mapping files should contain mappings between the '
445 'translated source files and the archive file compiled from those '
446 'source files.')
447 parser.add_argument(
Michael Thvedt828a4be2015-08-12 17:45:36 +0000448 '--entry_classes',
Michael Thvedt828a4be2015-08-12 17:45:36 +0000449 help=('The comma-separated list of Java entry classes to be used as entry'
450 ' point of the dead code anlysis.'))
451 parser.add_argument(
Rumou Duan123e1c32016-02-01 16:16:15 +0000452 '--xcrunwrapper',
453 help=('The xcrun wrapper script.'))
454
Michael Thvedt828a4be2015-08-12 17:45:36 +0000455 args = parser.parse_args()
456
457 if not args.entry_classes:
458 raise Exception('J2objC dead code removal is on but no entry class is ',
459 'specified in any j2objc_library targets in the transitive',
460 ' closure')
Rumou Duan123e1c32016-02-01 16:16:15 +0000461 if args.input_archive and args.output_archive:
462 PruneArchiveFile(
463 args.input_archive,
464 args.output_archive,
465 args.dummy_archive,
466 args.dependency_mapping_files,
467 args.header_mapping_files,
468 args.archive_source_mapping_files,
469 args.entry_classes,
470 args.xcrunwrapper)
471 else:
472 # TODO(rduan): Remove once J2ObjC compile actions are fully moved to the
473 # edges.
474 PruneSourceFiles(
475 args.input_files,
476 args.output_files,
477 args.dependency_mapping_files,
478 args.header_mapping_files,
479 args.entry_classes,
480 args.objc_file_path)