blob: 005241281f8a4e2c4b160bbd057950fed42b23e4 [file] [log] [blame]
Googler041f7ce2022-03-23 21:32:08 -07001#!/usr/bin/python3
2
3# Copyright 2015 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""A script for J2ObjC dead code removal in Blaze.
18
19This script removes unused J2ObjC-translated classes from compilation and
20linking by:
21 1. Build a class dependency tree among translated source files.
22 2. Use user-provided Java class entry points to get a list of reachable
23 classes.
24 3. Go through all translated source files and rewrite unreachable ones with
25 dummy content.
26"""
27
28import argparse
29import collections
30import multiprocessing
31import os
32import queue
33import re
34import shlex
35import shutil
36import subprocess
37import threading
38
39
40PRUNED_SRC_CONTENT = 'static int DUMMY_unused __attribute__((unused,used)) = 0;'
41
42
43def BuildReachabilityTree(dependency_mapping_files, file_open=open):
44 """Builds a reachability tree using entries from dependency mapping files.
45
46 Args:
47 dependency_mapping_files: A comma separated list of J2ObjC-generated
48 dependency mapping files.
49 file_open: Reference to the builtin open function so it may be
50 overridden for testing.
51 Returns:
52 A dict mapping J2ObjC-generated source files to the corresponding direct
53 dependent source files.
54 """
55 return BuildArtifactSourceTree(dependency_mapping_files, file_open)
56
57
58def BuildHeaderMapping(header_mapping_files, file_open=open):
59 """Builds a mapping between Java classes and J2ObjC-translated header files.
60
61 Args:
62 header_mapping_files: A comma separated list of J2ObjC-generated
63 header mapping files.
64 file_open: Reference to the builtin open function so it may be
65 overridden for testing.
66 Returns:
67 An ordered dict mapping Java class names to corresponding J2ObjC-translated
68 source files.
69 """
70 header_mapping = collections.OrderedDict()
71 for header_mapping_file in header_mapping_files.split(','):
72 with file_open(header_mapping_file, 'r') as f:
73 for line in f:
74 java_class_name = line.strip().split('=')[0]
75 transpiled_file_name = os.path.splitext(line.strip().split('=')[1])[0]
76 header_mapping[java_class_name] = transpiled_file_name
77 return header_mapping
78
79
80def BuildReachableFileSet(entry_classes, reachability_tree, header_mapping,
81 archive_source_file_mapping=None):
82 """Builds a set of reachable translated files from entry Java classes.
83
84 Args:
85 entry_classes: A comma separated list of Java entry classes.
86 reachability_tree: A dict mapping translated files to their direct
87 dependencies.
88 header_mapping: A dict mapping Java class names to translated source files.
89 archive_source_file_mapping: A dict mapping source files to the associated
90 archive file that contains them.
91 Returns:
92 A set of reachable translated files from the given list of entry classes.
93 Raises:
94 Exception: If there is an entry class that is not being transpiled in this
95 j2objc_library.
96 """
97 transpiled_entry_files = []
98 for entry_class in entry_classes.split(','):
99 if entry_class not in header_mapping:
100 raise Exception(
101 entry_class +
102 ' is not in the transitive Java deps of included ' +
103 'j2objc_library rules.')
104 transpiled_entry_files.append(header_mapping[entry_class])
105
106 # Translated files going into the same static library archive with duplicated
107 # base names also need to be added to the set of entry files.
108 #
109 # This edge case is ignored because we currently cannot correctly perform
110 # dead code removal in this case. The object file entries in static library
111 # archives are named by the base names of the original source files. If two
112 # source files (e.g., foo/bar.m, bar/bar.m) go into the same archive and
113 # share the same base name (bar.m), their object file entries inside the
114 # archive will have the same name (bar.o). We cannot correctly handle this
115 # case because current archive tools (ar, ranlib, etc.) do not handle this
116 # case very well.
117 if archive_source_file_mapping:
118 transpiled_entry_files.extend(_DuplicatedFiles(archive_source_file_mapping))
119
120 # Translated files from package-info.java are also added to the entry files
121 # because they are needed to resolve ObjC class names with prefixes and these
122 # files may also have dependencies.
123 for transpiled_file in reachability_tree:
124 if transpiled_file.endswith('package-info'):
125 transpiled_entry_files.append(transpiled_file)
126
127 reachable_files = set()
128 for transpiled_entry_file in transpiled_entry_files:
129 reachable_files.add(transpiled_entry_file)
130 current_level_deps = []
131 # We need to check if the transpiled file is in the reachability tree
132 # because J2ObjC protos are not analyzed for dead code stripping and
133 # therefore are not in the reachability tree at all.
134 if transpiled_entry_file in reachability_tree:
135 current_level_deps = reachability_tree[transpiled_entry_file]
136 while current_level_deps:
137 next_level_deps = []
138 for dep in current_level_deps:
139 if dep not in reachable_files:
140 reachable_files.add(dep)
141 if dep in reachability_tree:
142 next_level_deps.extend(reachability_tree[dep])
143 current_level_deps = next_level_deps
144 return reachable_files
145
146
147def PruneFiles(input_files, output_files, objc_file_path, reachable_files,
148 file_open=open, file_shutil=shutil):
149 """Copies over translated files and remove the contents of unreachable files.
150
151 Args:
152 input_files: A comma separated list of input source files to prune. It has
153 a one-on-one pair mapping with the output_file list.
154 output_files: A comma separated list of output source files to write pruned
155 source files to. It has a one-on-one pair mapping with the input_file
156 list.
157 objc_file_path: The file path which represents a directory where the
158 generated ObjC files reside.
159 reachable_files: A set of reachable source files.
160 file_open: Reference to the builtin open function so it may be
161 overridden for testing.
162 file_shutil: Reference to the builtin shutil module so it may be
163 overridden for testing.
164 Returns:
165 None.
166 """
167 file_queue = queue.queue()
168 for input_file, output_file in zip(
169 input_files.split(','),
170 output_files.split(',')):
171 file_queue.put((input_file, output_file))
172
173 for _ in range(multiprocessing.cpu_count()):
174 t = threading.Thread(target=_PruneFile, args=(file_queue,
175 reachable_files,
176 objc_file_path,
177 file_open,
178 file_shutil))
179 t.start()
180
181 file_queue.join()
182
183
184def _PruneFile(file_queue, reachable_files, objc_file_path, file_open=open,
185 file_shutil=shutil):
186 while True:
187 try:
188 input_file, output_file = file_queue.get_nowait()
189 except queue.Empty:
190 return
191 file_name = os.path.relpath(os.path.splitext(input_file)[0],
192 objc_file_path)
193 if file_name in reachable_files:
194 file_shutil.copy(input_file, output_file)
195 else:
196 with file_open(output_file, 'w') as f:
197 # Use a static variable scoped to the source file to suppress
198 # the "has no symbols" linker warning for empty object files.
199 f.write(PRUNED_SRC_CONTENT)
200 file_queue.task_done()
201
202
203def _DuplicatedFiles(archive_source_file_mapping):
204 """Returns a list of file with duplicated base names in each archive file.
205
206 Args:
207 archive_source_file_mapping: A dict mapping source files to the associated
208 archive file that contains them.
209 Returns:
210 A list containing files with duplicated base names.
211 """
212 duplicated_files = []
213 dict_with_duplicates = dict()
214
215 for source_files in archive_source_file_mapping.values():
216 for source_file in source_files:
217 file_basename = os.path.basename(source_file)
218 file_without_ext = os.path.splitext(source_file)[0]
219 if file_basename in dict_with_duplicates:
220 dict_with_duplicates[file_basename].append(file_without_ext)
221 else:
222 dict_with_duplicates[file_basename] = [file_without_ext]
223 for basename in dict_with_duplicates:
224 if len(dict_with_duplicates[basename]) > 1:
225 duplicated_files.extend(dict_with_duplicates[basename])
226 dict_with_duplicates = dict()
227
228 return duplicated_files
229
230
231def BuildArchiveSourceFileMapping(archive_source_mapping_files, file_open):
232 """Builds a mapping between archive files and their associated source files.
233
234 Args:
235 archive_source_mapping_files: A comma separated list of J2ObjC-generated
236 mapping between archive files and their associated source files.
237 file_open: Reference to the builtin open function so it may be
238 overridden for testing.
239 Returns:
240 A dict mapping between archive files and their associated source files.
241 """
242 return BuildArtifactSourceTree(archive_source_mapping_files, file_open)
243
244
245def PruneSourceFiles(input_files, output_files, dependency_mapping_files,
246 header_mapping_files, entry_classes, objc_file_path,
247 file_open=open, file_shutil=shutil):
248 """Copies over translated files and remove the contents of unreachable files.
249
250 Args:
251 input_files: A comma separated list of input source files to prune. It has
252 a one-on-one pair mapping with the output_file list.
253 output_files: A comma separated list of output source files to write pruned
254 source files to. It has a one-on-one pair mapping with the input_file
255 list.
256 dependency_mapping_files: A comma separated list of J2ObjC-generated
257 dependency mapping files.
258 header_mapping_files: A comma separated list of J2ObjC-generated
259 header mapping files.
260 entry_classes: A comma separated list of Java entry classes.
261 objc_file_path: The file path which represents a directory where the
262 generated ObjC files reside.
263 file_open: Reference to the builtin open function so it may be
264 overridden for testing.
265 file_shutil: Reference to the builtin shutil module so it may be
266 overridden for testing.
267 """
268 reachability_file_mapping = BuildReachabilityTree(
269 dependency_mapping_files, file_open)
270 header_map = BuildHeaderMapping(header_mapping_files, file_open)
271 reachable_files_set = BuildReachableFileSet(entry_classes,
272 reachability_file_mapping,
273 header_map)
274 PruneFiles(input_files,
275 output_files,
276 objc_file_path,
277 reachable_files_set,
278 file_open,
279 file_shutil)
280
281
282def MatchObjectNamesInArchive(xcrunwrapper, archive, object_names):
283 """Returns object names matching their identity in an archive file.
284
285 The linker that blaze uses appends an md5 hash to object file
286 names prior to inclusion in the archive file. Thus, object names
287 such as 'foo.o' need to be matched to their appropriate name in
288 the archive file, such as 'foo_<hash>.o'.
289
290 Args:
291 xcrunwrapper: A wrapper script over xcrun.
292 archive: The location of the archive file.
293 object_names: The expected basenames of object files to match,
294 sans extension. For example 'foo' (not 'foo.o').
295 Returns:
296 A list of basenames of matching members of the given archive
297 """
298 ar_contents_cmd = [xcrunwrapper, 'ar', '-t', archive]
299 real_object_names_output = subprocess.check_output(ar_contents_cmd)
300 real_object_names = real_object_names_output.decode('utf-8')
301 expected_object_name_regex = r'^(?:%s)(?:_[0-9a-f]{32}(?:-[0-9]+)?)?\.o$' % (
302 '|'.join([re.escape(name) for name in object_names]))
303 return re.findall(
304 expected_object_name_regex,
305 real_object_names,
306 flags=re.MULTILINE)
307
308
309def PruneArchiveFile(input_archive, output_archive, dummy_archive,
310 dependency_mapping_files, header_mapping_files,
311 archive_source_mapping_files, entry_classes, xcrunwrapper,
312 file_open=open):
313 """Remove unreachable objects from archive file.
314
315 Args:
316 input_archive: The source archive file to prune.
317 output_archive: The location of the pruned archive file.
318 dummy_archive: A dummy archive file that contains no object.
319 dependency_mapping_files: A comma separated list of J2ObjC-generated
320 dependency mapping files.
321 header_mapping_files: A comma separated list of J2ObjC-generated
322 header mapping files.
323 archive_source_mapping_files: A comma separated list of J2ObjC-generated
324 mapping between archive files and their associated source files.
325 entry_classes: A comma separated list of Java entry classes.
326 xcrunwrapper: A wrapper script over xcrun.
327 file_open: Reference to the builtin open function so it may be
328 overridden for testing.
329 """
330 reachability_file_mapping = BuildReachabilityTree(
331 dependency_mapping_files, file_open)
332 header_map = BuildHeaderMapping(header_mapping_files, file_open)
333 archive_source_file_mapping = BuildArchiveSourceFileMapping(
334 archive_source_mapping_files, file_open)
335 reachable_files_set = BuildReachableFileSet(entry_classes,
336 reachability_file_mapping,
337 header_map,
338 archive_source_file_mapping)
339
340 # Copy the current processes' environment, as xcrunwrapper depends on these
341 # variables.
342 cmd_env = dict(os.environ)
343 j2objc_cmd = ''
344 if input_archive in archive_source_file_mapping:
345 source_files = archive_source_file_mapping[input_archive]
346 unreachable_object_names = []
347
348 for source_file in source_files:
349 if os.path.splitext(source_file)[0] not in reachable_files_set:
350 unreachable_object_names.append(
351 os.path.basename(os.path.splitext(source_file)[0]))
352
353 # There are unreachable objects in the archive to prune
354 if unreachable_object_names:
355 # If all objects in the archive are unreachable, just copy over a dummy
356 # archive that contains no object
357 if len(unreachable_object_names) == len(source_files):
358 j2objc_cmd = 'cp %s %s' % (shlex.quote(dummy_archive),
359 shlex.quote(output_archive))
360 # Else we need to prune the archive of unreachable objects
361 else:
362 cmd_env['ZERO_AR_DATE'] = '1'
363 # Copy the input archive to the output location
364 j2objc_cmd += 'cp %s %s && ' % (shlex.quote(input_archive),
365 shlex.quote(output_archive))
366 # Make the output archive editable
367 j2objc_cmd += 'chmod +w %s && ' % (shlex.quote(output_archive))
368 # Remove the unreachable objects from the archive
369 unreachable_object_names = MatchObjectNamesInArchive(
370 xcrunwrapper, input_archive, unreachable_object_names)
371 j2objc_cmd += '%s ar -d -s %s %s && ' % (
372 shlex.quote(xcrunwrapper),
373 shlex.quote(output_archive),
374 ' '.join(shlex.quote(uon) for uon in unreachable_object_names))
375 # Update the table of content of the archive file
376 j2objc_cmd += '%s ranlib %s' % (shlex.quote(xcrunwrapper),
377 shlex.quote(output_archive))
378 # There are no unreachable objects, we just copy over the original archive
379 else:
380 j2objc_cmd = 'cp %s %s' % (shlex.quote(input_archive),
381 shlex.quote(output_archive))
382 # The archive cannot be pruned by J2ObjC dead code removal, just copy over
383 # the original archive
384 else:
385 j2objc_cmd = 'cp %s %s' % (shlex.quote(input_archive),
386 shlex.quote(output_archive))
387
388 try:
389 subprocess.check_output(
390 j2objc_cmd, stderr=subprocess.STDOUT, shell=True, env=cmd_env)
391 except OSError as e:
392 raise Exception(
393 'executing command failed: %s (%s)' % (j2objc_cmd, e.strerror))
394
395 # "Touch" the output file.
396 # Prevents a pre-Xcode-8 bug in which passing zero-date archive files to ld
397 # would cause ld to error.
398 os.utime(output_archive, None)
399
400
401def BuildArtifactSourceTree(files, file_open=open):
402 """Builds a dependency tree using from dependency mapping files.
403
404 Args:
405 files: A comma separated list of dependency mapping files.
406 file_open: Reference to the builtin open function so it may be overridden for
407 testing.
408
409 Returns:
410 A dict mapping build artifacts (possibly generated source files) to the
411 corresponding direct dependent source files.
412 """
413 tree = dict()
414 if not files:
415 return tree
416 for filename in files.split(','):
417 with file_open(filename, 'r') as f:
418 for line in f:
419 entry = line.strip().split(':')[0]
420 dep = line.strip().split(':')[1]
421 if entry in tree:
422 tree[entry].append(dep)
423 else:
424 tree[entry] = [dep]
425 return tree
426
427
428if __name__ == '__main__':
429 parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
430
431 # TODO(rduan): Remove these three flags once J2ObjC compile actions are fully
432 # moved to the edges.
433 parser.add_argument(
434 '--input_files',
435 help=('The comma-separated file paths of translated source files to '
436 'prune.'))
437 parser.add_argument(
438 '--output_files',
439 help='The comma-separated file paths of pruned source files to write to.')
440 parser.add_argument(
441 '--objc_file_path',
442 help='The file path which represents a directory where the generated ObjC'
443 ' files reside')
444
445 parser.add_argument(
446 '--input_archive',
447 help=('The path of the translated archive to prune.'))
448 parser.add_argument(
449 '--output_archive',
450 help='The path of the pruned archive file to write to.')
451 parser.add_argument(
452 '--dummy_archive',
453 help='The dummy archive file that contains no symbol.')
454 parser.add_argument(
455 '--dependency_mapping_files',
456 help='The comma-separated file paths of dependency mapping files.')
457 parser.add_argument(
458 '--header_mapping_files',
459 help='The comma-separated file paths of header mapping files.')
460 parser.add_argument(
461 '--archive_source_mapping_files',
462 help='The comma-separated file paths of archive to source mapping files.'
463 'These mapping files should contain mappings between the '
464 'translated source files and the archive file compiled from those '
465 'source files.')
466 parser.add_argument(
467 '--entry_classes',
468 help=('The comma-separated list of Java entry classes to be used as entry'
469 ' point of the dead code analysis.'))
470 parser.add_argument(
471 '--xcrunwrapper',
472 help=('The xcrun wrapper script.'))
473
474 args = parser.parse_args()
475
476 if not args.entry_classes:
477 raise Exception('J2objC dead code removal is on but no entry class is ',
478 'specified in any j2objc_library targets in the transitive',
479 ' closure')
480 if args.input_archive and args.output_archive:
481 PruneArchiveFile(
482 args.input_archive,
483 args.output_archive,
484 args.dummy_archive,
485 args.dependency_mapping_files,
486 args.header_mapping_files,
487 args.archive_source_mapping_files,
488 args.entry_classes,
489 args.xcrunwrapper)
490 else:
491 # TODO(rduan): Remove once J2ObjC compile actions are fully moved to the
492 # edges.
493 PruneSourceFiles(
494 args.input_files,
495 args.output_files,
496 args.dependency_mapping_files,
497 args.header_mapping_files,
498 args.entry_classes,
499 args.objc_file_path)