emscripten/tools/building.py at autodebug · JavaScriptBench/emscripten

History

1647 lines (1411 loc) · 63.4 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

# Emscripten is available under two separate licenses, the MIT license and the

# University of Illinois/NCSA Open Source License. Both these licenses can be

# found in the LICENSE file.

import atexit

import json

import logging

import multiprocessing

import os

import re

import shlex

import shutil

import subprocess

import sys

import tempfile

from subprocess import STDOUT, PIPE

from . import diagnostics

from . import response_file

from . import shared

from . import webassembly

from . import config

from .toolchain_profiler import ToolchainProfiler

from .shared import Settings, CLANG_CC, CLANG_CXX, PYTHON

from .shared import LLVM_NM, EMCC, EMAR, EMXX, EMRANLIB, WASM_LD, LLVM_AR

from .shared import LLVM_LINK, LLVM_OBJCOPY

from .shared import try_delete, run_process, check_call, exit_with_error

from .shared import configuration, path_from_root, EXPECTED_BINARYEN_VERSION

from .shared import asmjs_mangle, DEBUG

from .shared import EM_BUILD_VERBOSE, TEMP_DIR, print_compiler_stage

from .shared import CANONICAL_TEMP_DIR, LLVM_DWARFDUMP, demangle_c_symbol_name, asbytes

from .shared import get_emscripten_temp_dir, exe_suffix, is_c_symbol

from .utils import which, WINDOWS

logger = logging.getLogger('building')

# Building

multiprocessing_pool = None

binaryen_checked = False

# internal caches

internal_nm_cache = {}

# cache results of nm - it can be slow to run

uninternal_nm_cache = {}

# Stores the object files contained in different archive files passed as input

ar_contents = {}

_is_ar_cache = {}

# the exports the user requested

user_requested_exports = []

class ObjectFileInfo(object):

def __init__(self, returncode, output, defs=set(), undefs=set(), commons=set()):

self.returncode = returncode

self.output = output

self.defs = defs

self.undefs = undefs

self.commons = commons

def is_valid_for_nm(self):

return self.returncode == 0

# llvm-ar appears to just use basenames inside archives. as a result, files

# with the same basename will trample each other when we extract them. to help

# warn of such situations, we warn if there are duplicate entries in the

# archive

def warn_if_duplicate_entries(archive_contents, archive_filename):

if len(archive_contents) != len(set(archive_contents)):

msg = '%s: archive file contains duplicate entries. This is not supported by emscripten. Only the last member with a given name will be linked in which can result in undefined symbols. You should either rename your source files, or use `emar` to create you archives which works around this issue.' % archive_filename

warned = set()

for i in range(len(archive_contents)):

curr = archive_contents[i]

if curr not in warned and curr in archive_contents[i + 1:]:

msg += '\n duplicate: %s' % curr

warned.add(curr)

diagnostics.warning('emcc', msg)

# This function creates a temporary directory specified by the 'dir' field in

# the returned dictionary. Caller is responsible for cleaning up those files

# after done.

def extract_archive_contents(archive_file):

lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines()

# ignore empty lines

contents = [l for l in lines if len(l)]

if len(contents) == 0:

logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file)

return {

'returncode': 0,

'dir': None,

'files': []

}

# `ar` files can only contains filenames. Just to be sure, verify that each

# file has only as filename component and is not absolute

for f in contents:

assert not os.path.dirname(f)

assert not os.path.isabs(f)

warn_if_duplicate_entries(contents, archive_file)

# create temp dir

temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_')

# extract file in temp dir

proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir)

abs_contents = [os.path.join(temp_dir, c) for c in contents]

# check that all files were created

missing_contents = [x for x in abs_contents if not os.path.exists(x)]

if missing_contents:

exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout))

return {

'returncode': proc.returncode,

'dir': temp_dir,

'files': abs_contents

}

# Due to a python pickling issue, the following two functions must be at top

# level, or multiprocessing pool spawn won't find them.

def g_llvm_nm_uncached(filename):

return llvm_nm_uncached(filename)

def g_multiprocessing_initializer(*args):

for item in args:

(key, value) = item.split('=', 1)

if key == 'EMCC_POOL_CWD':

os.chdir(value)

else:

os.environ[key] = value

def unique_ordered(values):

"""return a list of unique values in an input list, without changing order

(list(set(.)) would change order randomly).

"""

seen = set()

def check(value):

if value in seen:

return False

seen.add(value)

return True

return list(filter(check, values))

# clear internal caches. this is not normally needed, except if the clang/LLVM

# used changes inside this invocation of Building, which can happen in the benchmarker

# when it compares different builds.

def clear():

internal_nm_cache.clear()

uninternal_nm_cache.clear()

ar_contents.clear()

_is_ar_cache.clear()

def get_num_cores():

return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count()))

# Multiprocessing pools are very slow to build up and tear down, and having

# several pools throughout the application has a problem of overallocating

# child processes. Therefore maintain a single centralized pool that is shared

# between all pooled task invocations.

def get_multiprocessing_pool():

global multiprocessing_pool

if not multiprocessing_pool:

cores = get_num_cores()

if DEBUG:

# When in EMCC_DEBUG mode, only use a single core in the pool, so that

# logging is not all jumbled up.

cores = 1

# If running with one core only, create a mock instance of a pool that does not

# actually spawn any new subprocesses. Very useful for internal debugging.

if cores == 1:

class FakeMultiprocessor(object):

def map(self, func, tasks, *args, **kwargs):

results = []

for t in tasks:

results += [func(t)]

return results

def map_async(self, func, tasks, *args, **kwargs):

class Result:

def __init__(self, func, tasks):

self.func = func

self.tasks = tasks

def get(self, timeout):

results = []

for t in tasks:

results += [func(t)]

return results

return Result(func, tasks)

multiprocessing_pool = FakeMultiprocessor()

else:

child_env = [

# Multiprocessing pool children must have their current working

# directory set to a safe path that is guaranteed not to die in

# between of executing commands, or otherwise the pool children will

# have trouble spawning subprocesses of their own.

'EMCC_POOL_CWD=' + path_from_root(),

# Multiprocessing pool children can't spawn their own linear number of

# children, that could cause a quadratic amount of spawned processes.

'EMCC_CORES=1'

]

multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env)

def close_multiprocessing_pool():

global multiprocessing_pool

try:

# Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate

# noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal.

multiprocessing_pool.terminate()

multiprocessing_pool.join()

multiprocessing_pool = None

except OSError as e:

# Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through

if not (sys.platform.startswith('win') and isinstance(e, WindowsError) and e.winerror == 5):

raise

atexit.register(close_multiprocessing_pool)

return multiprocessing_pool

# .. but for Popen, we cannot have doublequotes, so provide functionality to

# remove them when needed.

def remove_quotes(arg):

if isinstance(arg, list):

return [remove_quotes(a) for a in arg]

if arg.startswith('"') and arg.endswith('"'):

return arg[1:-1].replace('\\"', '"')

elif arg.startswith("'") and arg.endswith("'"):

return arg[1:-1].replace("\\'", "'")

else:

return arg

def get_building_env(cflags=[]):

env = os.environ.copy()

# point CC etc. to the em* tools.

env['CC'] = EMCC

env['CXX'] = EMXX

env['AR'] = EMAR

env['LD'] = EMCC

env['NM'] = LLVM_NM

env['LDSHARED'] = EMCC

env['RANLIB'] = EMRANLIB

env['EMSCRIPTEN_TOOLS'] = path_from_root('tools')

if cflags:

env['CFLAGS'] = env['EMMAKEN_CFLAGS'] = ' '.join(cflags)

env['HOST_CC'] = CLANG_CC

env['HOST_CXX'] = CLANG_CXX

env['HOST_CFLAGS'] = "-W" # if set to nothing, CFLAGS is used, which we don't want

env['HOST_CXXFLAGS'] = "-W" # if set to nothing, CXXFLAGS is used, which we don't want

env['PKG_CONFIG_LIBDIR'] = path_from_root('system', 'local', 'lib', 'pkgconfig') + os.path.pathsep + path_from_root('system', 'lib', 'pkgconfig')

env['PKG_CONFIG_PATH'] = os.environ.get('EM_PKG_CONFIG_PATH', '')

env['EMSCRIPTEN'] = path_from_root()

env['PATH'] = path_from_root('system', 'bin') + os.pathsep + env['PATH']

env['CROSS_COMPILE'] = path_from_root('em') # produces /path/to/emscripten/em , which then can have 'cc', 'ar', etc appended to it

return env

# Returns a clone of the given environment with all directories that contain

# sh.exe removed from the PATH. Used to work around CMake limitation with

# MinGW Makefiles, where sh.exe is not allowed to be present.

def remove_sh_exe_from_path(env):

env = env.copy()

if not WINDOWS:

return env

path = env['PATH'].split(';')

path = [p for p in path if not os.path.exists(os.path.join(p, 'sh.exe'))]

env['PATH'] = ';'.join(path)

return env

def handle_cmake_toolchain(args, env):

def has_substr(args, substr):

return any(substr in s for s in args)

# Append the Emscripten toolchain file if the user didn't specify one.

if not has_substr(args, '-DCMAKE_TOOLCHAIN_FILE'):

args.append('-DCMAKE_TOOLCHAIN_FILE=' + path_from_root('cmake', 'Modules', 'Platform', 'Emscripten.cmake'))

node_js = config.NODE_JS

if not has_substr(args, '-DCMAKE_CROSSCOMPILING_EMULATOR'):

node_js = config.NODE_JS[0].replace('"', '\"')

args.append('-DCMAKE_CROSSCOMPILING_EMULATOR="%s"' % node_js)

# On Windows specify MinGW Makefiles or ninja if we have them and no other

# toolchain was specified, to keep CMake from pulling in a native Visual

# Studio, or Unix Makefiles.

if WINDOWS and '-G' not in args:

if which('mingw32-make'):

args += ['-G', 'MinGW Makefiles']

elif which('ninja'):

args += ['-G', 'Ninja']

# CMake has a requirement that it wants sh.exe off PATH if MinGW Makefiles

# is being used. This happens quite often, so do this automatically on

# behalf of the user. See

# http://www.cmake.org/Wiki/CMake_MinGW_Compiler_Issues

if WINDOWS and 'MinGW Makefiles' in args:

env = remove_sh_exe_from_path(env)

return (args, env)

def configure(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs):

if env:

env = env.copy()

else:

env = get_building_env(cflags=cflags)

if 'cmake' in args[0]:

# Note: EMMAKEN_JUST_CONFIGURE shall not be enabled when configuring with

# CMake. This is because CMake does expect to be able to do

# config-time builds with emcc.

args, env = handle_cmake_toolchain(args, env)

else:

# When we configure via a ./configure script, don't do config-time

# compilation with emcc, but instead do builds natively with Clang. This

# is a heuristic emulation that may or may not work.

env['EMMAKEN_JUST_CONFIGURE'] = '1'

if EM_BUILD_VERBOSE >= 2:

stdout = None

if EM_BUILD_VERBOSE >= 1:

stderr = None

print('configure: ' + shared.shlex_join(args), file=sys.stderr)

run_process(args, stdout=stdout, stderr=stderr, env=env, **kwargs)

def make(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs):

if env is None:

env = get_building_env(cflags=cflags)

# On Windows prefer building with mingw32-make instead of make, if it exists.

if WINDOWS:

if args[0] == 'make':

mingw32_make = which('mingw32-make')

if mingw32_make:

args[0] = mingw32_make

if 'mingw32-make' in args[0]:

env = remove_sh_exe_from_path(env)

# On Windows, run the execution through shell to get PATH expansion and

# executable extension lookup, e.g. 'sdl2-config' will match with

# 'sdl2-config.bat' in PATH.

if EM_BUILD_VERBOSE >= 2:

stdout = None

if EM_BUILD_VERBOSE >= 1:

stderr = None

print('make: ' + ' '.join(args), file=sys.stderr)

run_process(args, stdout=stdout, stderr=stderr, env=env, shell=WINDOWS, **kwargs)

def make_paths_absolute(f):

if f.startswith('-'): # skip flags

return f

else:

return os.path.abspath(f)

# Runs llvm-nm in parallel for the given list of files.

# The results are populated in uninternal_nm_cache

# multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None

# to have the function allocate its own.

def parallel_llvm_nm(files):

with ToolchainProfiler.profile_block('parallel_llvm_nm'):

pool = get_multiprocessing_pool()

object_contents = pool.map(g_llvm_nm_uncached, files)

for i, file in enumerate(files):

if object_contents[i].returncode != 0:

logger.debug('llvm-nm failed on file ' + file + ': return code ' + str(object_contents[i].returncode) + ', error: ' + object_contents[i].output)

uninternal_nm_cache[file] = object_contents[i]

return object_contents

def read_link_inputs(files):

with ToolchainProfiler.profile_block('read_link_inputs'):

# Before performing the link, we need to look at each input file to determine which symbols

# each of them provides. Do this in multiple parallel processes.

archive_names = [] # .a files passed in to the command line to the link

object_names = [] # .o/.bc files passed in to the command line to the link

for f in files:

absolute_path_f = make_paths_absolute(f)

if absolute_path_f not in ar_contents and is_ar(absolute_path_f):

archive_names.append(absolute_path_f)

elif absolute_path_f not in uninternal_nm_cache and is_bitcode(absolute_path_f):

object_names.append(absolute_path_f)

# Archives contain objects, so process all archives first in parallel to obtain the object files in them.

pool = get_multiprocessing_pool()

object_names_in_archives = pool.map(extract_archive_contents, archive_names)

def clean_temporary_archive_contents_directory(directory):

def clean_at_exit():

try_delete(directory)

if directory:

atexit.register(clean_at_exit)

for n in range(len(archive_names)):

if object_names_in_archives[n]['returncode'] != 0:

raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!')

ar_contents[archive_names[n]] = object_names_in_archives[n]['files']

clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir'])

for o in object_names_in_archives:

for f in o['files']:

if f not in uninternal_nm_cache:

object_names.append(f)

# Next, extract symbols from all object files (either standalone or inside archives we just extracted)

# The results are not used here directly, but populated to llvm-nm cache structure.

parallel_llvm_nm(object_names)

def llvm_backend_args():

# disable slow and relatively unimportant optimization passes

args = ['-combiner-global-alias-analysis=false']

# asm.js-style exception handling

if Settings.DISABLE_EXCEPTION_CATCHING != 1:

args += ['-enable-emscripten-cxx-exceptions']

if Settings.DISABLE_EXCEPTION_CATCHING == 2:

allowed = ','.join(Settings.EXCEPTION_CATCHING_ALLOWED or ['__fake'])

args += ['-emscripten-cxx-exceptions-allowed=' + allowed]

if Settings.SUPPORT_LONGJMP:

# asm.js-style setjmp/longjmp handling

args += ['-enable-emscripten-sjlj']

# better (smaller, sometimes faster) codegen, see binaryen#1054

# and https://bugs.llvm.org/show_bug.cgi?id=39488

args += ['-disable-lsr']

return args

def link_to_object(linker_inputs, target):

# link using lld for the wasm backend with wasm object files,

# other otherwise for linking of bitcode we must use our python

# code (necessary for asm.js, for wasm bitcode see

# https://bugs.llvm.org/show_bug.cgi?id=40654)

if not Settings.LTO:

link_lld(linker_inputs + ['--relocatable'], target)

else:

link_bitcode(linker_inputs, target)

def link_llvm(linker_inputs, target):

# runs llvm-link to link things.

cmd = [LLVM_LINK] + linker_inputs + ['-o', target]

cmd = get_command_with_possible_response_file(cmd)

print_compiler_stage(cmd)

check_call(cmd)

def lld_flags_for_executable(external_symbol_list):

cmd = []

if external_symbol_list:

undefs = configuration.get_temp_files().get('.undefined').name

with open(undefs, 'w') as f:

f.write('\n'.join(external_symbol_list))

cmd.append('--allow-undefined-file=%s' % undefs)

else:

cmd.append('--allow-undefined')

# wasi does not import the memory (but for JS it is efficient to do so,

# as it allows us to set up memory, preload files, etc. even before the

# wasm module arrives)

if not Settings.STANDALONE_WASM:

cmd.append('--import-memory')

if Settings.USE_PTHREADS:

cmd.append('--shared-memory')

# wasm-ld can strip debug info for us. this strips both the Names

# section and DWARF, so we can only use it when we don't need any of

# those things.

if Settings.DEBUG_LEVEL < 2 and (not Settings.EMIT_SYMBOL_MAP and

not Settings.PROFILING_FUNCS and

not Settings.ASYNCIFY):

cmd.append('--strip-debug')

if Settings.RELOCATABLE:

if Settings.MAIN_MODULE == 2 or Settings.SIDE_MODULE == 2:

cmd.append('--no-export-dynamic')

else:

cmd.append('--no-gc-sections')

cmd.append('--export-dynamic')

else:

cmd.append('--export-table')

if Settings.ALLOW_TABLE_GROWTH:

cmd.append('--growable-table')

if Settings.LINKABLE:

cmd.append('--export-all')

else:

c_exports = [e for e in Settings.EXPORTED_FUNCTIONS if is_c_symbol(e)]

# Strip the leading underscores

c_exports = [demangle_c_symbol_name(e) for e in c_exports]

if external_symbol_list:

# Filter out symbols external/JS symbols

c_exports = [e for e in c_exports if e not in external_symbol_list]

for export in c_exports:

cmd += ['--export', export]

if Settings.RELOCATABLE:

cmd.append('--experimental-pic')

if Settings.SIDE_MODULE:

cmd.append('-shared')

else:

cmd.append('-pie')

if not Settings.SIDE_MODULE:

cmd += [

'-z', 'stack-size=%s' % Settings.TOTAL_STACK,

'--initial-memory=%d' % Settings.INITIAL_MEMORY,

]

if Settings.STANDALONE_WASM:

# when Settings.EXPECT_MAIN is set we fall back to wasm-ld default of _start

if not Settings.EXPECT_MAIN:

cmd += ['--entry=_initialize']

else:

if Settings.EXPECT_MAIN and not Settings.IGNORE_MISSING_MAIN:

cmd += ['--entry=main']

else:

cmd += ['--no-entry']

if not Settings.ALLOW_MEMORY_GROWTH:

cmd.append('--max-memory=%d' % Settings.INITIAL_MEMORY)

elif Settings.MAXIMUM_MEMORY != -1:

cmd.append('--max-memory=%d' % Settings.MAXIMUM_MEMORY)

if not Settings.RELOCATABLE:

cmd.append('--global-base=%s' % Settings.GLOBAL_BASE)

return cmd

def link_lld(args, target, external_symbol_list=None):

if not os.path.exists(WASM_LD):

exit_with_error('linker binary not found in LLVM directory: %s', WASM_LD)

# runs lld to link things.

# lld doesn't currently support --start-group/--end-group since the

# semantics are more like the windows linker where there is no need for

# grouping.

args = [a for a in args if a not in ('--start-group', '--end-group')]

# Emscripten currently expects linkable output (SIDE_MODULE/MAIN_MODULE) to

# include all archive contents.

if Settings.LINKABLE:

args.insert(0, '--whole-archive')

args.append('--no-whole-archive')

if Settings.STRICT:

args.append('--fatal-warnings')

cmd = [WASM_LD, '-o', target] + args

for a in llvm_backend_args():

cmd += ['-mllvm', a]

# For relocatable output (generating an object file) we don't pass any of the

# normal linker flags that are used when building and exectuable

if '--relocatable' not in args and '-r' not in args:

cmd += lld_flags_for_executable(external_symbol_list)

print_compiler_stage(cmd)

cmd = get_command_with_possible_response_file(cmd)

check_call(cmd)

def link_bitcode(files, target, force_archive_contents=False):

# "Full-featured" linking: looks into archives (duplicates lld functionality)

actual_files = []

# Tracking unresolveds is necessary for .a linking, see below.

# Specify all possible entry points to seed the linking process.

# For a simple application, this would just be "main".

unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS])

resolved_symbols = set()

# Paths of already included object files from archives.

added_contents = set()

has_ar = False

for f in files:

if not f.startswith('-'):

has_ar = has_ar or is_ar(make_paths_absolute(f))

# If we have only one archive or the force_archive_contents flag is set,

# then we will add every object file we see, regardless of whether it

# resolves any undefined symbols.

force_add_all = len(files) == 1 or force_archive_contents

# Considers an object file for inclusion in the link. The object is included

# if force_add=True or if the object provides a currently undefined symbol.

# If the object is included, the symbol tables are updated and the function

# returns True.

def consider_object(f, force_add=False):

new_symbols = llvm_nm(f)

# Check if the object was valid according to llvm-nm. It also accepts

# native object files.

if not new_symbols.is_valid_for_nm():

diagnostics.warning('emcc', 'object %s is not valid according to llvm-nm, cannot link', f)

return False

# Check the object is valid for us, and not a native object file.

if not is_bitcode(f):

exit_with_error('unknown file type: %s', f)

provided = new_symbols.defs.union(new_symbols.commons)

do_add = force_add or not unresolved_symbols.isdisjoint(provided)

if do_add:

logger.debug('adding object %s to link (forced: %d)' % (f, force_add))

# Update resolved_symbols table with newly resolved symbols

resolved_symbols.update(provided)

# Update unresolved_symbols table by adding newly unresolved symbols and

# removing newly resolved symbols.

unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols))

unresolved_symbols.difference_update(provided)

actual_files.append(f)

return do_add

# Traverse a single archive. The object files are repeatedly scanned for

# newly satisfied symbols until no new symbols are found. Returns true if

# any object files were added to the link.

def consider_archive(f, force_add):

added_any_objects = False

loop_again = True

logger.debug('considering archive %s' % (f))

contents = ar_contents[f]

while loop_again: # repeatedly traverse until we have everything we need

loop_again = False

for content in contents:

if content in added_contents:

continue

# Link in the .o if it provides symbols, *or* this is a singleton archive (which is

# apparently an exception in gcc ld)

if consider_object(content, force_add=force_add):

added_contents.add(content)

loop_again = True

added_any_objects = True

logger.debug('done running loop of archive %s' % (f))

return added_any_objects

read_link_inputs([x for x in files if not x.startswith('-')])

# Rescan a group of archives until we don't find any more objects to link.

def scan_archive_group(group):

loop_again = True

logger.debug('starting archive group loop')

while loop_again:

loop_again = False

for archive in group:

if consider_archive(archive, force_add=False):

loop_again = True

logger.debug('done with archive group loop')

current_archive_group = None

in_whole_archive = False

for f in files:

absolute_path_f = make_paths_absolute(f)

if f.startswith('-'):

if f in ['--start-group', '-(']:

assert current_archive_group is None, 'Nested --start-group, missing --end-group?'

current_archive_group = []

elif f in ['--end-group', '-)']:

assert current_archive_group is not None, '--end-group without --start-group'

scan_archive_group(current_archive_group)

current_archive_group = None

elif f in ['--whole-archive', '-whole-archive']:

in_whole_archive = True

elif f in ['--no-whole-archive', '-no-whole-archive']:

in_whole_archive = False

else:

# Command line flags should already be vetted by the time this method

# is called, so this is an internal error

assert False, 'unsupported link flag: ' + f

elif is_ar(absolute_path_f):

# Extract object files from ar archives, and link according to gnu ld semantics

# (link in an entire .o from the archive if it supplies symbols still unresolved)

consider_archive(absolute_path_f, in_whole_archive or force_add_all)

# If we're inside a --start-group/--end-group section, add to the list

# so we can loop back around later.

if current_archive_group is not None:

current_archive_group.append(absolute_path_f)

elif is_bitcode(absolute_path_f):

if has_ar:

consider_object(f, force_add=True)

else:

# If there are no archives then we can simply link all valid object

# files and skip the symbol table stuff.

actual_files.append(f)

else:

exit_with_error('unknown file type: %s', f)

# We have to consider the possibility that --start-group was used without a matching

# --end-group; GNU ld permits this behavior and implicitly treats the end of the

# command line as having an --end-group.

if current_archive_group:

logger.debug('--start-group without matching --end-group, rescanning')

scan_archive_group(current_archive_group)

current_archive_group = None

try_delete(target)

# Finish link

# tolerate people trying to link a.so a.so etc.

actual_files = unique_ordered(actual_files)

logger.debug('emcc: linking: %s to %s', actual_files, target)

link_llvm(actual_files, target)

def get_command_with_possible_response_file(cmd):

# 8k is a bit of an arbitrary limit, but a reasonable one

# for max command line size before we use a response file

if len(' '.join(cmd)) <= 8192:

return cmd

logger.debug('using response file for %s' % cmd[0])

filename = response_file.create_response_file(cmd[1:], TEMP_DIR)

new_cmd = [cmd[0], "@" + filename]

return new_cmd

def parse_symbols(output, include_internal=False):

defs = []

undefs = []

commons = []

for line in output.split('\n'):

if not line or line[0] == '#':

continue

# e.g. filename.o: , saying which file it's from

if ':' in line:

continue

parts = [seg for seg in line.split(' ') if len(seg)]

# pnacl-nm will print zero offsets for bitcode, and newer llvm-nm will print present symbols

# as -------- T name

if len(parts) == 3 and parts[0] == "--------" or re.match(r'^[\da-f]{8}$', parts[0]):

parts.pop(0)

if len(parts) == 2:

# ignore lines with absolute offsets, these are not bitcode anyhow

# e.g. |00000630 t d_source_name|

status, symbol = parts

if status == 'U':

undefs.append(symbol)

elif status == 'C':

commons.append(symbol)

elif (not include_internal and status == status.upper()) or \

(include_internal and status in ['W', 't', 'T', 'd', 'D']):

# FIXME: using WTD in the previous line fails due to llvm-nm behavior on macOS,

# so for now we assume all uppercase are normally defined external symbols

defs.append(symbol)

return ObjectFileInfo(0, None, set(defs), set(undefs), set(commons))

def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE, include_internal=False):

# LLVM binary ==> list of symbols

proc = run_process([LLVM_NM, filename], stdout=stdout, stderr=stderr, check=False)

if proc.returncode == 0:

return parse_symbols(proc.stdout, include_internal)

else:

return ObjectFileInfo(proc.returncode, str(proc.stdout) + str(proc.stderr))

def llvm_nm(filename, stdout=PIPE, stderr=PIPE, include_internal=False):

# Always use absolute paths to maximize cache usage

filename = os.path.abspath(filename)

if include_internal and filename in internal_nm_cache:

return internal_nm_cache[filename]

elif not include_internal and filename in uninternal_nm_cache:

return uninternal_nm_cache[filename]

ret = llvm_nm_uncached(filename, stdout, stderr, include_internal)

if ret.returncode != 0:

logger.debug('llvm-nm failed on file ' + filename + ': return code ' + str(ret.returncode) + ', error: ' + ret.output)

# Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the failing file again later.

if include_internal:

internal_nm_cache[filename] = ret

else:

uninternal_nm_cache[filename] = ret

return ret

def emcc(filename, args=[], output_filename=None, stdout=None, stderr=None, env=None):

if output_filename is None:

output_filename = filename + '.o'

try_delete(output_filename)

run_process([EMCC, filename] + args + ['-o', output_filename], stdout=stdout, stderr=stderr, env=env)

def emar(action, output_filename, filenames, stdout=None, stderr=None, env=None):

try_delete(output_filename)

response_filename = response_file.create_response_file(filenames, TEMP_DIR)

cmd = [EMAR, action, output_filename] + ['@' + response_filename]

try:

run_process(cmd, stdout=stdout, stderr=stderr, env=env)

finally:

try_delete(response_filename)

if 'c' in action:

assert os.path.exists(output_filename), 'emar could not create output file: ' + output_filename

def get_safe_internalize():

if Settings.LINKABLE:

return [] # do not internalize anything

exps = Settings.EXPORTED_FUNCTIONS

internalize_public_api = '-internalize-public-api-'

internalize_list = ','.join([demangle_c_symbol_name(exp) for exp in exps])

# EXPORTED_FUNCTIONS can potentially be very large.

# 8k is a bit of an arbitrary limit, but a reasonable one

# for max command line size before we use a response file

if len(internalize_list) > 8192:

logger.debug('using response file for EXPORTED_FUNCTIONS in internalize')

finalized_exports = '\n'.join([exp[1:] for exp in exps])

internalize_list_file = configuration.get_temp_files().get('.response').name

with open(internalize_list_file, 'w') as f:

f.write(finalized_exports)

internalize_public_api += 'file=' + internalize_list_file

else:

internalize_public_api += 'list=' + internalize_list

# internalize carefully, llvm 3.2 will remove even main if not told not to

return ['-internalize', internalize_public_api]

def opt_level_to_str(opt_level, shrink_level=0):

# convert opt_level/shrink_level pair to a string argument like -O1

if opt_level == 0:

return '-O0'

if shrink_level == 1:

return '-Os'

elif shrink_level >= 2:

return '-Oz'

else:

return '-O' + str(min(opt_level, 3))

def js_optimizer(filename, passes):

from . import js_optimizer

try:

return js_optimizer.run(filename, passes)

except subprocess.CalledProcessError as e:

exit_with_error("'%s' failed (%d)", ' '.join(e.cmd), e.returncode)

# run JS optimizer on some JS, ignoring asm.js contents if any - just run on it all

def acorn_optimizer(filename, passes, extra_info=None, return_output=False):

optimizer = path_from_root('tools', 'acorn-optimizer.js')

original_filename = filename

if extra_info is not None:

temp_files = configuration.get_temp_files()

temp = temp_files.get('.js').name

shutil.copyfile(filename, temp)

with open(temp, 'a') as f:

f.write('// EXTRA_INFO: ' + extra_info)

filename = temp

cmd = config.NODE_JS + [optimizer, filename] + passes

# Keep JS code comments intact through the acorn optimization pass so that JSDoc comments

# will be carried over to a later Closure run.

if Settings.USE_CLOSURE_COMPILER:

cmd += ['--closureFriendly']

if not return_output:

next = original_filename + '.jso.js'

configuration.get_temp_files().note(next)

check_call(cmd, stdout=open(next, 'w'))

save_intermediate(next, '%s.js' % passes[0])

return next

output = check_call(cmd, stdout=PIPE).stdout

return output

# evals ctors. if binaryen_bin is provided, it is the dir of the binaryen tool

# for this, and we are in wasm mode

def eval_ctors(js_file, binary_file, debug_info=False): # noqa

logger.debug('Ctor evalling in the wasm backend is disabled due to https://github.com/emscripten-core/emscripten/issues/9527')

return

# TODO re-enable

# cmd = [PYTHON, path_from_root('tools', 'ctor_evaller.py'), js_file, binary_file, str(Settings.INITIAL_MEMORY), str(Settings.TOTAL_STACK), str(Settings.GLOBAL_BASE), binaryen_bin, str(int(debug_info))]

# if binaryen_bin:

# cmd += get_binaryen_feature_flags()

# print_compiler_stage(cmd)

# check_call(cmd)

def get_closure_compiler():

# First check if the user configured a specific CLOSURE_COMPILER in thier settings

if config.CLOSURE_COMPILER:

return shared.CLOSURE_COMPILER

# Otherwise use the one installed vai npm

cmd = shared.get_npm_cmd('google-closure-compiler')

if not WINDOWS:

# Work around an issue that Closure compiler can take up a lot of memory and crash in an error

# "FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap

# out of memory"

cmd.insert(-1, '--max_old_space_size=8192')

return cmd

def check_closure_compiler(cmd, args, env, allowed_to_fail):

try:

output = run_process(cmd + args + ['--version'], stdout=PIPE, env=env).stdout

except Exception as e:

if allowed_to_fail:

return False

logger.warn(str(e))

exit_with_error('closure compiler ("%s --version") did not execute properly!' % str(cmd))

if 'Version:' not in output:

if allowed_to_fail:

return False

exit_with_error('unrecognized closure compiler --version output (%s):\n%s' % (str(cmd), output))

return True

def closure_compiler(filename, pretty=True, advanced=True, extra_closure_args=None):

with ToolchainProfiler.profile_block('closure_compiler'):

env = shared.env_with_node_in_path()

user_args = []

env_args = os.environ.get('EMCC_CLOSURE_ARGS')

if env_args:

user_args += shlex.split(env_args)

if extra_closure_args:

user_args += extra_closure_args

# Closure compiler expects JAVA_HOME to be set *and* java.exe to be in the PATH in order

# to enable use the java backend. Without this it will only try the native and JavaScript

# versions of the compiler.

java_bin = os.path.dirname(config.JAVA)

if java_bin:

def add_to_path(dirname):

env['PATH'] = env['PATH'] + os.pathsep + dirname

add_to_path(java_bin)

java_home = os.path.dirname(java_bin)

env.setdefault('JAVA_HOME', java_home)

closure_cmd = get_closure_compiler()

native_closure_compiler_works = check_closure_compiler(closure_cmd, user_args, env, allowed_to_fail=True)

if not native_closure_compiler_works and not any(a.startswith('--platform') for a in user_args):

# Run with Java Closure compiler as a fallback if the native version does not work

user_args.append('--platform=java')

check_closure_compiler(closure_cmd, user_args, env, allowed_to_fail=False)

# Closure externs file contains known symbols to be extern to the minification, Closure

# should not minify these symbol names.

CLOSURE_EXTERNS = [path_from_root('src', 'closure-externs', 'closure-externs.js')]

# Closure compiler needs to know about all exports that come from the asm.js/wasm module, because to optimize for small code size,

# the exported symbols are added to global scope via a foreach loop in a way that evades Closure's static analysis. With an explicit

# externs file for the exports, Closure is able to reason about the exports.

if Settings.MODULE_EXPORTS and not Settings.DECLARE_ASM_MODULE_EXPORTS:

# Generate an exports file that records all the exported symbols from asm.js/wasm module.

module_exports_suppressions = '\n'.join(['/**\n * @suppress {duplicate, undefinedVars}\n */\nvar %s;\n' % i for i, j in Settings.MODULE_EXPORTS])

exports_file = configuration.get_temp_files().get('_module_exports.js')

exports_file.write(module_exports_suppressions.encode())

exports_file.close()

CLOSURE_EXTERNS += [exports_file.name]

# Node.js specific externs

if Settings.target_environment_may_be('node'):

NODE_EXTERNS_BASE = path_from_root('third_party', 'closure-compiler', 'node-externs')

NODE_EXTERNS = os.listdir(NODE_EXTERNS_BASE)

NODE_EXTERNS = [os.path.join(NODE_EXTERNS_BASE, name) for name in NODE_EXTERNS

if name.endswith('.js')]

CLOSURE_EXTERNS += [path_from_root('src', 'closure-externs', 'node-externs.js')] + NODE_EXTERNS

# V8/SpiderMonkey shell specific externs

if Settings.target_environment_may_be('shell'):

V8_EXTERNS = [path_from_root('src', 'closure-externs', 'v8-externs.js')]

SPIDERMONKEY_EXTERNS = [path_from_root('src', 'closure-externs', 'spidermonkey-externs.js')]

CLOSURE_EXTERNS += V8_EXTERNS + SPIDERMONKEY_EXTERNS

# Web environment specific externs

if Settings.target_environment_may_be('web') or Settings.target_environment_may_be('worker'):

BROWSER_EXTERNS_BASE = path_from_root('src', 'closure-externs', 'browser-externs')

if os.path.isdir(BROWSER_EXTERNS_BASE):

BROWSER_EXTERNS = os.listdir(BROWSER_EXTERNS_BASE)

BROWSER_EXTERNS = [os.path.join(BROWSER_EXTERNS_BASE, name) for name in BROWSER_EXTERNS

if name.endswith('.js')]

CLOSURE_EXTERNS += BROWSER_EXTERNS

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

building.py

Latest commit

History

building.py

File metadata and controls