blob: 626c83c7058e550b9c25d28e16c28ae0029cb7d7 [file] [log] [blame]
Googler041f7ce2022-03-23 21:32:08 -07001#!/usr/bin/python3
2
3# Copyright 2015 The Bazel Authors. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""A script for J2ObjC dead code removal in Blaze.
18
19This script removes unused J2ObjC-translated classes from compilation and
20linking by:
21 1. Build a class dependency tree among translated source files.
22 2. Use user-provided Java class entry points to get a list of reachable
23 classes.
24 3. Go through all translated source files and rewrite unreachable ones with
25 dummy content.
26"""
27
28import argparse
29import collections
30import multiprocessing
31import os
32import queue
33import re
34import shlex
35import shutil
36import subprocess
37import threading
38
39
40PRUNED_SRC_CONTENT = 'static int DUMMY_unused __attribute__((unused,used)) = 0;'
41
42
43def BuildReachabilityTree(dependency_mapping_files, file_open=open):
44 """Builds a reachability tree using entries from dependency mapping files.
45
46 Args:
47 dependency_mapping_files: A comma separated list of J2ObjC-generated
48 dependency mapping files.
49 file_open: Reference to the builtin open function so it may be
50 overridden for testing.
51 Returns:
52 A dict mapping J2ObjC-generated source files to the corresponding direct
53 dependent source files.
54 """
55 return BuildArtifactSourceTree(dependency_mapping_files, file_open)
56
57
58def BuildHeaderMapping(header_mapping_files, file_open=open):
59 """Builds a mapping between Java classes and J2ObjC-translated header files.
60
61 Args:
62 header_mapping_files: A comma separated list of J2ObjC-generated
63 header mapping files.
64 file_open: Reference to the builtin open function so it may be
65 overridden for testing.
66 Returns:
67 An ordered dict mapping Java class names to corresponding J2ObjC-translated
68 source files.
69 """
70 header_mapping = collections.OrderedDict()
71 for header_mapping_file in header_mapping_files.split(','):
72 with file_open(header_mapping_file, 'r') as f:
73 for line in f:
74 java_class_name = line.strip().split('=')[0]
75 transpiled_file_name = os.path.splitext(line.strip().split('=')[1])[0]
76 header_mapping[java_class_name] = transpiled_file_name
77 return header_mapping
78
79
80def BuildReachableFileSet(entry_classes, reachability_tree, header_mapping,
81 archive_source_file_mapping=None):
82 """Builds a set of reachable translated files from entry Java classes.
83
84 Args:
85 entry_classes: A comma separated list of Java entry classes.
86 reachability_tree: A dict mapping translated files to their direct
87 dependencies.
88 header_mapping: A dict mapping Java class names to translated source files.
89 archive_source_file_mapping: A dict mapping source files to the associated
90 archive file that contains them.
91 Returns:
92 A set of reachable translated files from the given list of entry classes.
93 Raises:
94 Exception: If there is an entry class that is not being transpiled in this
95 j2objc_library.
96 """
97 transpiled_entry_files = []
98 for entry_class in entry_classes.split(','):
99 if entry_class not in header_mapping:
100 raise Exception(
101 entry_class +
102 ' is not in the transitive Java deps of included ' +
103 'j2objc_library rules.')
104 transpiled_entry_files.append(header_mapping[entry_class])
105
106 # Translated files going into the same static library archive with duplicated
107 # base names also need to be added to the set of entry files.
108 #
109 # This edge case is ignored because we currently cannot correctly perform
110 # dead code removal in this case. The object file entries in static library
111 # archives are named by the base names of the original source files. If two
112 # source files (e.g., foo/bar.m, bar/bar.m) go into the same archive and
113 # share the same base name (bar.m), their object file entries inside the
114 # archive will have the same name (bar.o). We cannot correctly handle this
115 # case because current archive tools (ar, ranlib, etc.) do not handle this
116 # case very well.
117 if archive_source_file_mapping:
118 transpiled_entry_files.extend(_DuplicatedFiles(archive_source_file_mapping))
119
120 # Translated files from package-info.java are also added to the entry files
121 # because they are needed to resolve ObjC class names with prefixes and these
122 # files may also have dependencies.
123 for transpiled_file in reachability_tree:
124 if transpiled_file.endswith('package-info'):
125 transpiled_entry_files.append(transpiled_file)
126
127 reachable_files = set()
128 for transpiled_entry_file in transpiled_entry_files:
129 reachable_files.add(transpiled_entry_file)
130 current_level_deps = []
131 # We need to check if the transpiled file is in the reachability tree
132 # because J2ObjC protos are not analyzed for dead code stripping and
133 # therefore are not in the reachability tree at all.
134 if transpiled_entry_file in reachability_tree:
135 current_level_deps = reachability_tree[transpiled_entry_file]
136 while current_level_deps:
137 next_level_deps = []
138 for dep in current_level_deps:
139 if dep not in reachable_files:
140 reachable_files.add(dep)
141 if dep in reachability_tree:
142 next_level_deps.extend(reachability_tree[dep])
143 current_level_deps = next_level_deps
144 return reachable_files
145
146
147def PruneFiles(input_files, output_files, objc_file_path, reachable_files,
148 file_open=open, file_shutil=shutil):
149 """Copies over translated files and remove the contents of unreachable files.
150
151 Args:
152 input_files: A comma separated list of input source files to prune. It has
153 a one-on-one pair mapping with the output_file list.
154 output_files: A comma separated list of output source files to write pruned
155 source files to. It has a one-on-one pair mapping with the input_file
156 list.
157 objc_file_path: The file path which represents a directory where the
158 generated ObjC files reside.
159 reachable_files: A set of reachable source files.
160 file_open: Reference to the builtin open function so it may be
161 overridden for testing.
162 file_shutil: Reference to the builtin shutil module so it may be
163 overridden for testing.
164 Returns:
165 None.
166 """
167 file_queue = queue.queue()
168 for input_file, output_file in zip(
169 input_files.split(','),
170 output_files.split(',')):
171 file_queue.put((input_file, output_file))
172
173 for _ in range(multiprocessing.cpu_count()):
174 t = threading.Thread(target=_PruneFile, args=(file_queue,
175 reachable_files,
176 objc_file_path,
177 file_open,
178 file_shutil))
179 t.start()
180
181 file_queue.join()
182
183
184def _PruneFile(file_queue, reachable_files, objc_file_path, file_open=open,
185 file_shutil=shutil):
186 while True:
187 try:
188 input_file, output_file = file_queue.get_nowait()
189 except queue.Empty:
190 return
191 file_name = os.path.relpath(os.path.splitext(input_file)[0],
192 objc_file_path)
193 if file_name in reachable_files:
194 file_shutil.copy(input_file, output_file)
195 else:
196 with file_open(output_file, 'w') as f:
197 # Use a static variable scoped to the source file to suppress
198 # the "has no symbols" linker warning for empty object files.
199 f.write(PRUNED_SRC_CONTENT)
200 file_queue.task_done()
201
202
203def _DuplicatedFiles(archive_source_file_mapping):
204 """Returns a list of file with duplicated base names in each archive file.
205
206 Args:
207 archive_source_file_mapping: A dict mapping source files to the associated
208 archive file that contains them.
209 Returns:
210 A list containing files with duplicated base names.
211 """
212 duplicated_files = []
213 dict_with_duplicates = dict()
214
215 for source_files in archive_source_file_mapping.values():
216 for source_file in source_files:
217 file_basename = os.path.basename(source_file)
218 file_without_ext = os.path.splitext(source_file)[0]
219 if file_basename in dict_with_duplicates:
220 dict_with_duplicates[file_basename].append(file_without_ext)
221 else:
222 dict_with_duplicates[file_basename] = [file_without_ext]
223 for basename in dict_with_duplicates:
224 if len(dict_with_duplicates[basename]) > 1:
225 duplicated_files.extend(dict_with_duplicates[basename])
226 dict_with_duplicates = dict()
227
228 return duplicated_files
229
230
231def BuildArchiveSourceFileMapping(archive_source_mapping_files, file_open):
232 """Builds a mapping between archive files and their associated source files.
233
234 Args:
235 archive_source_mapping_files: A comma separated list of J2ObjC-generated
236 mapping between archive files and their associated source files.
237 file_open: Reference to the builtin open function so it may be
238 overridden for testing.
239 Returns:
240 A dict mapping between archive files and their associated source files.
241 """
242 return BuildArtifactSourceTree(archive_source_mapping_files, file_open)
243
244
245def PruneSourceFiles(input_files, output_files, dependency_mapping_files,
246 header_mapping_files, entry_classes, objc_file_path,
247 file_open=open, file_shutil=shutil):
248 """Copies over translated files and remove the contents of unreachable files.
249
250 Args:
251 input_files: A comma separated list of input source files to prune. It has
252 a one-on-one pair mapping with the output_file list.
253 output_files: A comma separated list of output source files to write pruned
254 source files to. It has a one-on-one pair mapping with the input_file
255 list.
256 dependency_mapping_files: A comma separated list of J2ObjC-generated
257 dependency mapping files.
258 header_mapping_files: A comma separated list of J2ObjC-generated
259 header mapping files.
260 entry_classes: A comma separated list of Java entry classes.
261 objc_file_path: The file path which represents a directory where the
262 generated ObjC files reside.
263 file_open: Reference to the builtin open function so it may be
264 overridden for testing.
265 file_shutil: Reference to the builtin shutil module so it may be
266 overridden for testing.
267 """
268 reachability_file_mapping = BuildReachabilityTree(
269 dependency_mapping_files, file_open)
270 header_map = BuildHeaderMapping(header_mapping_files, file_open)
271 reachable_files_set = BuildReachableFileSet(entry_classes,
272 reachability_file_mapping,
273 header_map)
274 PruneFiles(input_files,
275 output_files,
276 objc_file_path,
277 reachable_files_set,
278 file_open,
279 file_shutil)
280
281
Googler815b6c92023-06-30 10:25:47 -0700282def MatchObjectNamesInArchive(archive, object_names):
Googler041f7ce2022-03-23 21:32:08 -0700283 """Returns object names matching their identity in an archive file.
284
285 The linker that blaze uses appends an md5 hash to object file
286 names prior to inclusion in the archive file. Thus, object names
287 such as 'foo.o' need to be matched to their appropriate name in
288 the archive file, such as 'foo_<hash>.o'.
289
290 Args:
Googler041f7ce2022-03-23 21:32:08 -0700291 archive: The location of the archive file.
292 object_names: The expected basenames of object files to match,
293 sans extension. For example 'foo' (not 'foo.o').
294 Returns:
295 A list of basenames of matching members of the given archive
296 """
Googler815b6c92023-06-30 10:25:47 -0700297 ar_contents_cmd = ['/usr/bin/xcrun', 'ar', '-t', archive]
Googler041f7ce2022-03-23 21:32:08 -0700298 real_object_names_output = subprocess.check_output(ar_contents_cmd)
299 real_object_names = real_object_names_output.decode('utf-8')
300 expected_object_name_regex = r'^(?:%s)(?:_[0-9a-f]{32}(?:-[0-9]+)?)?\.o$' % (
301 '|'.join([re.escape(name) for name in object_names]))
302 return re.findall(
303 expected_object_name_regex,
304 real_object_names,
305 flags=re.MULTILINE)
306
307
Googler815b6c92023-06-30 10:25:47 -0700308def PruneArchiveFile(
309 input_archive,
310 output_archive,
311 dummy_archive,
312 dependency_mapping_files,
313 header_mapping_files,
314 archive_source_mapping_files,
315 entry_classes,
316 file_open=open,
317):
Googler041f7ce2022-03-23 21:32:08 -0700318 """Remove unreachable objects from archive file.
319
320 Args:
321 input_archive: The source archive file to prune.
322 output_archive: The location of the pruned archive file.
323 dummy_archive: A dummy archive file that contains no object.
324 dependency_mapping_files: A comma separated list of J2ObjC-generated
325 dependency mapping files.
326 header_mapping_files: A comma separated list of J2ObjC-generated
327 header mapping files.
328 archive_source_mapping_files: A comma separated list of J2ObjC-generated
329 mapping between archive files and their associated source files.
330 entry_classes: A comma separated list of Java entry classes.
Googler041f7ce2022-03-23 21:32:08 -0700331 file_open: Reference to the builtin open function so it may be
332 overridden for testing.
333 """
334 reachability_file_mapping = BuildReachabilityTree(
335 dependency_mapping_files, file_open)
336 header_map = BuildHeaderMapping(header_mapping_files, file_open)
337 archive_source_file_mapping = BuildArchiveSourceFileMapping(
338 archive_source_mapping_files, file_open)
339 reachable_files_set = BuildReachableFileSet(entry_classes,
340 reachability_file_mapping,
341 header_map,
342 archive_source_file_mapping)
343
Googler815b6c92023-06-30 10:25:47 -0700344 # Copy the current processes' environment.
Googler041f7ce2022-03-23 21:32:08 -0700345 cmd_env = dict(os.environ)
346 j2objc_cmd = ''
347 if input_archive in archive_source_file_mapping:
348 source_files = archive_source_file_mapping[input_archive]
349 unreachable_object_names = []
350
351 for source_file in source_files:
352 if os.path.splitext(source_file)[0] not in reachable_files_set:
353 unreachable_object_names.append(
354 os.path.basename(os.path.splitext(source_file)[0]))
355
356 # There are unreachable objects in the archive to prune
357 if unreachable_object_names:
358 # If all objects in the archive are unreachable, just copy over a dummy
359 # archive that contains no object
360 if len(unreachable_object_names) == len(source_files):
361 j2objc_cmd = 'cp %s %s' % (shlex.quote(dummy_archive),
362 shlex.quote(output_archive))
363 # Else we need to prune the archive of unreachable objects
364 else:
365 cmd_env['ZERO_AR_DATE'] = '1'
366 # Copy the input archive to the output location
367 j2objc_cmd += 'cp %s %s && ' % (shlex.quote(input_archive),
368 shlex.quote(output_archive))
369 # Make the output archive editable
370 j2objc_cmd += 'chmod +w %s && ' % (shlex.quote(output_archive))
371 # Remove the unreachable objects from the archive
372 unreachable_object_names = MatchObjectNamesInArchive(
Googler815b6c92023-06-30 10:25:47 -0700373 input_archive, unreachable_object_names
374 )
375 j2objc_cmd += '/usr/bin/xcrun ar -d -s %s %s && ' % (
Googler041f7ce2022-03-23 21:32:08 -0700376 shlex.quote(output_archive),
Googler815b6c92023-06-30 10:25:47 -0700377 ' '.join(shlex.quote(uon) for uon in unreachable_object_names),
378 )
Googler041f7ce2022-03-23 21:32:08 -0700379 # Update the table of content of the archive file
Googler815b6c92023-06-30 10:25:47 -0700380 j2objc_cmd += '/usr/bin/xcrun ranlib %s' % shlex.quote(output_archive)
Googler041f7ce2022-03-23 21:32:08 -0700381 # There are no unreachable objects, we just copy over the original archive
382 else:
383 j2objc_cmd = 'cp %s %s' % (shlex.quote(input_archive),
384 shlex.quote(output_archive))
385 # The archive cannot be pruned by J2ObjC dead code removal, just copy over
386 # the original archive
387 else:
388 j2objc_cmd = 'cp %s %s' % (shlex.quote(input_archive),
389 shlex.quote(output_archive))
390
391 try:
392 subprocess.check_output(
393 j2objc_cmd, stderr=subprocess.STDOUT, shell=True, env=cmd_env)
394 except OSError as e:
395 raise Exception(
396 'executing command failed: %s (%s)' % (j2objc_cmd, e.strerror))
397
398 # "Touch" the output file.
399 # Prevents a pre-Xcode-8 bug in which passing zero-date archive files to ld
400 # would cause ld to error.
401 os.utime(output_archive, None)
402
403
404def BuildArtifactSourceTree(files, file_open=open):
405 """Builds a dependency tree using from dependency mapping files.
406
407 Args:
408 files: A comma separated list of dependency mapping files.
409 file_open: Reference to the builtin open function so it may be overridden for
410 testing.
411
412 Returns:
413 A dict mapping build artifacts (possibly generated source files) to the
414 corresponding direct dependent source files.
415 """
416 tree = dict()
417 if not files:
418 return tree
419 for filename in files.split(','):
420 with file_open(filename, 'r') as f:
421 for line in f:
Googler177f0d42023-04-04 15:28:39 -0700422 split = line.strip().split(':')
423 entry = split[0]
424 if len(split) == 1:
425 # The build system allows for adding just the entry if the dependency
426 # is the same name
427 dep = split[0]
428 else:
429 dep = split[1]
Googler041f7ce2022-03-23 21:32:08 -0700430 if entry in tree:
431 tree[entry].append(dep)
432 else:
433 tree[entry] = [dep]
434 return tree
435
436
437if __name__ == '__main__':
438 parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
439
440 # TODO(rduan): Remove these three flags once J2ObjC compile actions are fully
441 # moved to the edges.
442 parser.add_argument(
443 '--input_files',
444 help=('The comma-separated file paths of translated source files to '
445 'prune.'))
446 parser.add_argument(
447 '--output_files',
448 help='The comma-separated file paths of pruned source files to write to.')
449 parser.add_argument(
450 '--objc_file_path',
451 help='The file path which represents a directory where the generated ObjC'
452 ' files reside')
453
454 parser.add_argument(
455 '--input_archive',
456 help=('The path of the translated archive to prune.'))
457 parser.add_argument(
458 '--output_archive',
459 help='The path of the pruned archive file to write to.')
460 parser.add_argument(
461 '--dummy_archive',
462 help='The dummy archive file that contains no symbol.')
463 parser.add_argument(
464 '--dependency_mapping_files',
465 help='The comma-separated file paths of dependency mapping files.')
466 parser.add_argument(
467 '--header_mapping_files',
468 help='The comma-separated file paths of header mapping files.')
469 parser.add_argument(
470 '--archive_source_mapping_files',
471 help='The comma-separated file paths of archive to source mapping files.'
472 'These mapping files should contain mappings between the '
473 'translated source files and the archive file compiled from those '
474 'source files.')
475 parser.add_argument(
476 '--entry_classes',
477 help=('The comma-separated list of Java entry classes to be used as entry'
478 ' point of the dead code analysis.'))
Googler041f7ce2022-03-23 21:32:08 -0700479
480 args = parser.parse_args()
481
482 if not args.entry_classes:
483 raise Exception('J2objC dead code removal is on but no entry class is ',
484 'specified in any j2objc_library targets in the transitive',
485 ' closure')
486 if args.input_archive and args.output_archive:
487 PruneArchiveFile(
488 args.input_archive,
489 args.output_archive,
490 args.dummy_archive,
491 args.dependency_mapping_files,
492 args.header_mapping_files,
493 args.archive_source_mapping_files,
494 args.entry_classes,
Googler815b6c92023-06-30 10:25:47 -0700495 )
Googler041f7ce2022-03-23 21:32:08 -0700496 else:
497 # TODO(rduan): Remove once J2ObjC compile actions are fully moved to the
498 # edges.
499 PruneSourceFiles(
500 args.input_files,
501 args.output_files,
502 args.dependency_mapping_files,
503 args.header_mapping_files,
504 args.entry_classes,
505 args.objc_file_path)