1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
use std::cmp;
use std::collections::BTreeSet;
use std::time::{Duration, Instant};

use itertools::Itertools;
use rustc_ast::expand::allocator::{global_fn_name, AllocatorKind, ALLOCATOR_METHODS};
use rustc_attr as attr;
use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
use rustc_data_structures::profiling::{get_resident_set_size, print_time_passes_entry};
use rustc_data_structures::sync::par_map;
use rustc_data_structures::unord::UnordMap;
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
use rustc_hir::lang_items::LangItem;
use rustc_metadata::EncodedMetadata;
use rustc_middle::bug;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
use rustc_middle::middle::debugger_visualizer::{DebuggerVisualizerFile, DebuggerVisualizerType};
use rustc_middle::middle::exported_symbols::SymbolExportKind;
use rustc_middle::middle::{exported_symbols, lang_items};
use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem};
use rustc_middle::mir::BinOp;
use rustc_middle::query::Providers;
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
use rustc_session::Session;
use rustc_span::symbol::sym;
use rustc_span::{Symbol, DUMMY_SP};
use rustc_target::abi::FIRST_VARIANT;
use tracing::{debug, info};

use crate::assert_module_sources::CguReuse;
use crate::back::link::are_upstream_rust_objects_already_included;
use crate::back::metadata::create_compressed_metadata_file;
use crate::back::write::{
    compute_per_cgu_lto_type, start_async_codegen, submit_codegened_module_to_llvm,
    submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, ComputedLtoType, OngoingCodegen,
};
use crate::common::{self, IntPredicate, RealPredicate, TypeKind};
use crate::mir::operand::OperandValue;
use crate::mir::place::PlaceRef;
use crate::traits::*;
use crate::{
    errors, meth, mir, CachedModuleCodegen, CompiledModule, CrateInfo, ModuleCodegen, ModuleKind,
};

pub fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate {
    match op {
        BinOp::Eq => IntPredicate::IntEQ,
        BinOp::Ne => IntPredicate::IntNE,
        BinOp::Lt => {
            if signed {
                IntPredicate::IntSLT
            } else {
                IntPredicate::IntULT
            }
        }
        BinOp::Le => {
            if signed {
                IntPredicate::IntSLE
            } else {
                IntPredicate::IntULE
            }
        }
        BinOp::Gt => {
            if signed {
                IntPredicate::IntSGT
            } else {
                IntPredicate::IntUGT
            }
        }
        BinOp::Ge => {
            if signed {
                IntPredicate::IntSGE
            } else {
                IntPredicate::IntUGE
            }
        }
        op => bug!(
            "comparison_op_to_icmp_predicate: expected comparison operator, \
             found {:?}",
            op
        ),
    }
}

pub fn bin_op_to_fcmp_predicate(op: BinOp) -> RealPredicate {
    match op {
        BinOp::Eq => RealPredicate::RealOEQ,
        BinOp::Ne => RealPredicate::RealUNE,
        BinOp::Lt => RealPredicate::RealOLT,
        BinOp::Le => RealPredicate::RealOLE,
        BinOp::Gt => RealPredicate::RealOGT,
        BinOp::Ge => RealPredicate::RealOGE,
        op => {
            bug!(
                "comparison_op_to_fcmp_predicate: expected comparison operator, \
                 found {:?}",
                op
            );
        }
    }
}

pub fn compare_simd_types<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    lhs: Bx::Value,
    rhs: Bx::Value,
    t: Ty<'tcx>,
    ret_ty: Bx::Type,
    op: BinOp,
) -> Bx::Value {
    let signed = match t.kind() {
        ty::Float(_) => {
            let cmp = bin_op_to_fcmp_predicate(op);
            let cmp = bx.fcmp(cmp, lhs, rhs);
            return bx.sext(cmp, ret_ty);
        }
        ty::Uint(_) => false,
        ty::Int(_) => true,
        _ => bug!("compare_simd_types: invalid SIMD type"),
    };

    let cmp = bin_op_to_icmp_predicate(op, signed);
    let cmp = bx.icmp(cmp, lhs, rhs);
    // LLVM outputs an `< size x i1 >`, so we need to perform a sign extension
    // to get the correctly sized type. This will compile to a single instruction
    // once the IR is converted to assembly if the SIMD instruction is supported
    // by the target architecture.
    bx.sext(cmp, ret_ty)
}

/// Retrieves the information we are losing (making dynamic) in an unsizing
/// adjustment.
///
/// The `old_info` argument is a bit odd. It is intended for use in an upcast,
/// where the new vtable for an object will be derived from the old one.
pub fn unsized_info<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    source: Ty<'tcx>,
    target: Ty<'tcx>,
    old_info: Option<Bx::Value>,
) -> Bx::Value {
    let cx = bx.cx();
    let (source, target) =
        cx.tcx().struct_lockstep_tails_for_codegen(source, target, bx.param_env());
    match (source.kind(), target.kind()) {
        (&ty::Array(_, len), &ty::Slice(_)) => {
            cx.const_usize(len.eval_target_usize(cx.tcx(), ty::ParamEnv::reveal_all()))
        }
        (&ty::Dynamic(data_a, _, src_dyn_kind), &ty::Dynamic(data_b, _, target_dyn_kind))
            if src_dyn_kind == target_dyn_kind =>
        {
            let old_info =
                old_info.expect("unsized_info: missing old info for trait upcasting coercion");
            if data_a.principal_def_id() == data_b.principal_def_id() {
                // A NOP cast that doesn't actually change anything, should be allowed even with invalid vtables.
                return old_info;
            }

            // trait upcasting coercion

            let vptr_entry_idx = cx.tcx().supertrait_vtable_slot((source, target));

            if let Some(entry_idx) = vptr_entry_idx {
                let ptr_size = bx.data_layout().pointer_size;
                let ptr_align = bx.data_layout().pointer_align.abi;
                let vtable_byte_offset = u64::try_from(entry_idx).unwrap() * ptr_size.bytes();
                let gep = bx.inbounds_ptradd(old_info, bx.const_usize(vtable_byte_offset));
                let new_vptr = bx.load(bx.type_ptr(), gep, ptr_align);
                bx.nonnull_metadata(new_vptr);
                // VTable loads are invariant.
                bx.set_invariant_load(new_vptr);
                new_vptr
            } else {
                old_info
            }
        }
        (_, ty::Dynamic(data, _, _)) => meth::get_vtable(cx, source, data.principal()),
        _ => bug!("unsized_info: invalid unsizing {:?} -> {:?}", source, target),
    }
}

/// Coerces `src` to `dst_ty`. `src_ty` must be a pointer.
pub fn unsize_ptr<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    src: Bx::Value,
    src_ty: Ty<'tcx>,
    dst_ty: Ty<'tcx>,
    old_info: Option<Bx::Value>,
) -> (Bx::Value, Bx::Value) {
    debug!("unsize_ptr: {:?} => {:?}", src_ty, dst_ty);
    match (src_ty.kind(), dst_ty.kind()) {
        (&ty::Ref(_, a, _), &ty::Ref(_, b, _) | &ty::RawPtr(b, _))
        | (&ty::RawPtr(a, _), &ty::RawPtr(b, _)) => {
            assert_eq!(bx.cx().type_is_sized(a), old_info.is_none());
            (src, unsized_info(bx, a, b, old_info))
        }
        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
            assert_eq!(def_a, def_b); // implies same number of fields
            let src_layout = bx.cx().layout_of(src_ty);
            let dst_layout = bx.cx().layout_of(dst_ty);
            if src_ty == dst_ty {
                return (src, old_info.unwrap());
            }
            let mut result = None;
            for i in 0..src_layout.fields.count() {
                let src_f = src_layout.field(bx.cx(), i);
                if src_f.is_1zst() {
                    // We are looking for the one non-1-ZST field; this is not it.
                    continue;
                }

                assert_eq!(src_layout.fields.offset(i).bytes(), 0);
                assert_eq!(dst_layout.fields.offset(i).bytes(), 0);
                assert_eq!(src_layout.size, src_f.size);

                let dst_f = dst_layout.field(bx.cx(), i);
                assert_ne!(src_f.ty, dst_f.ty);
                assert_eq!(result, None);
                result = Some(unsize_ptr(bx, src, src_f.ty, dst_f.ty, old_info));
            }
            result.unwrap()
        }
        _ => bug!("unsize_ptr: called on bad types"),
    }
}

/// Coerces `src` to `dst_ty` which is guaranteed to be a `dyn*` type.
pub fn cast_to_dyn_star<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    src: Bx::Value,
    src_ty_and_layout: TyAndLayout<'tcx>,
    dst_ty: Ty<'tcx>,
    old_info: Option<Bx::Value>,
) -> (Bx::Value, Bx::Value) {
    debug!("cast_to_dyn_star: {:?} => {:?}", src_ty_and_layout.ty, dst_ty);
    assert!(
        matches!(dst_ty.kind(), ty::Dynamic(_, _, ty::DynStar)),
        "destination type must be a dyn*"
    );
    let src = match bx.cx().type_kind(bx.cx().backend_type(src_ty_and_layout)) {
        TypeKind::Pointer => src,
        TypeKind::Integer => bx.inttoptr(src, bx.type_ptr()),
        // FIXME(dyn-star): We probably have to do a bitcast first, then inttoptr.
        kind => bug!("unexpected TypeKind for left-hand side of `dyn*` cast: {kind:?}"),
    };
    (src, unsized_info(bx, src_ty_and_layout.ty, dst_ty, old_info))
}

/// Coerces `src`, which is a reference to a value of type `src_ty`,
/// to a value of type `dst_ty`, and stores the result in `dst`.
pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    src: PlaceRef<'tcx, Bx::Value>,
    dst: PlaceRef<'tcx, Bx::Value>,
) {
    let src_ty = src.layout.ty;
    let dst_ty = dst.layout.ty;
    match (src_ty.kind(), dst_ty.kind()) {
        (&ty::Ref(..), &ty::Ref(..) | &ty::RawPtr(..)) | (&ty::RawPtr(..), &ty::RawPtr(..)) => {
            let (base, info) = match bx.load_operand(src).val {
                OperandValue::Pair(base, info) => unsize_ptr(bx, base, src_ty, dst_ty, Some(info)),
                OperandValue::Immediate(base) => unsize_ptr(bx, base, src_ty, dst_ty, None),
                OperandValue::Ref(..) | OperandValue::ZeroSized => bug!(),
            };
            OperandValue::Pair(base, info).store(bx, dst);
        }

        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
            assert_eq!(def_a, def_b); // implies same number of fields

            for i in def_a.variant(FIRST_VARIANT).fields.indices() {
                let src_f = src.project_field(bx, i.as_usize());
                let dst_f = dst.project_field(bx, i.as_usize());

                if dst_f.layout.is_zst() {
                    // No data here, nothing to copy/coerce.
                    continue;
                }

                if src_f.layout.ty == dst_f.layout.ty {
                    bx.typed_place_copy(dst_f.val, src_f.val, src_f.layout);
                } else {
                    coerce_unsized_into(bx, src_f, dst_f);
                }
            }
        }
        _ => bug!("coerce_unsized_into: invalid coercion {:?} -> {:?}", src_ty, dst_ty,),
    }
}

/// Returns `rhs` sufficiently masked, truncated, and/or extended so that it can be used to shift
/// `lhs`: it has the same size as `lhs`, and the value, when interpreted unsigned (no matter its
/// type), will not exceed the size of `lhs`.
///
/// Shifts in MIR are all allowed to have mismatched LHS & RHS types, and signed RHS.
/// The shift methods in `BuilderMethods`, however, are fully homogeneous
/// (both parameters and the return type are all the same size) and assume an unsigned RHS.
///
/// If `is_unchecked` is false, this masks the RHS to ensure it stays in-bounds,
/// as the `BuilderMethods` shifts are UB for out-of-bounds shift amounts.
/// For 32- and 64-bit types, this matches the semantics
/// of Java. (See related discussion on #1877 and #10183.)
///
/// If `is_unchecked` is true, this does no masking, and adds sufficient `assume`
/// calls or operation flags to preserve as much freedom to optimize as possible.
pub fn build_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    lhs: Bx::Value,
    mut rhs: Bx::Value,
    is_unchecked: bool,
) -> Bx::Value {
    // Shifts may have any size int on the rhs
    let mut rhs_llty = bx.cx().val_ty(rhs);
    let mut lhs_llty = bx.cx().val_ty(lhs);

    let mask = common::shift_mask_val(bx, lhs_llty, rhs_llty, false);
    if !is_unchecked {
        rhs = bx.and(rhs, mask);
    }

    if bx.cx().type_kind(rhs_llty) == TypeKind::Vector {
        rhs_llty = bx.cx().element_type(rhs_llty)
    }
    if bx.cx().type_kind(lhs_llty) == TypeKind::Vector {
        lhs_llty = bx.cx().element_type(lhs_llty)
    }
    let rhs_sz = bx.cx().int_width(rhs_llty);
    let lhs_sz = bx.cx().int_width(lhs_llty);
    if lhs_sz < rhs_sz {
        if is_unchecked && bx.sess().opts.optimize != OptLevel::No {
            // FIXME: Use `trunc nuw` once that's available
            let inrange = bx.icmp(IntPredicate::IntULE, rhs, mask);
            bx.assume(inrange);
        }

        bx.trunc(rhs, lhs_llty)
    } else if lhs_sz > rhs_sz {
        // We zero-extend even if the RHS is signed. So e.g. `(x: i32) << -1i8` will zero-extend the
        // RHS to `255i32`. But then we mask the shift amount to be within the size of the LHS
        // anyway so the result is `31` as it should be. All the extra bits introduced by zext
        // are masked off so their value does not matter.
        // FIXME: if we ever support 512bit integers, this will be wrong! For such large integers,
        // the extra bits introduced by zext are *not* all masked away any more.
        assert!(lhs_sz <= 256);
        bx.zext(rhs, lhs_llty)
    } else {
        rhs
    }
}

// Returns `true` if this session's target will use native wasm
// exceptions. This means that the VM does the unwinding for
// us
pub fn wants_wasm_eh(sess: &Session) -> bool {
    sess.target.is_like_wasm && sess.target.os != "emscripten"
}

/// Returns `true` if this session's target will use SEH-based unwinding.
///
/// This is only true for MSVC targets, and even then the 64-bit MSVC target
/// currently uses SEH-ish unwinding with DWARF info tables to the side (same as
/// 64-bit MinGW) instead of "full SEH".
pub fn wants_msvc_seh(sess: &Session) -> bool {
    sess.target.is_like_msvc
}

/// Returns `true` if this session's target requires the new exception
/// handling LLVM IR instructions (catchpad / cleanuppad / ... instead
/// of landingpad)
pub fn wants_new_eh_instructions(sess: &Session) -> bool {
    wants_wasm_eh(sess) || wants_msvc_seh(sess)
}

pub fn codegen_instance<'a, 'tcx: 'a, Bx: BuilderMethods<'a, 'tcx>>(
    cx: &'a Bx::CodegenCx,
    instance: Instance<'tcx>,
) {
    // this is an info! to allow collecting monomorphization statistics
    // and to allow finding the last function before LLVM aborts from
    // release builds.
    info!("codegen_instance({})", instance);

    mir::codegen_mir::<Bx>(cx, instance);
}

/// Creates the `main` function which will initialize the rust runtime and call
/// users main function.
pub fn maybe_create_entry_wrapper<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    cx: &'a Bx::CodegenCx,
) -> Option<Bx::Function> {
    let (main_def_id, entry_type) = cx.tcx().entry_fn(())?;
    let main_is_local = main_def_id.is_local();
    let instance = Instance::mono(cx.tcx(), main_def_id);

    if main_is_local {
        // We want to create the wrapper in the same codegen unit as Rust's main
        // function.
        if !cx.codegen_unit().contains_item(&MonoItem::Fn(instance)) {
            return None;
        }
    } else if !cx.codegen_unit().is_primary() {
        // We want to create the wrapper only when the codegen unit is the primary one
        return None;
    }

    let main_llfn = cx.get_fn_addr(instance);

    let entry_fn = create_entry_fn::<Bx>(cx, main_llfn, main_def_id, entry_type);
    return Some(entry_fn);

    fn create_entry_fn<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
        cx: &'a Bx::CodegenCx,
        rust_main: Bx::Value,
        rust_main_def_id: DefId,
        entry_type: EntryFnType,
    ) -> Bx::Function {
        // The entry function is either `int main(void)` or `int main(int argc, char **argv)`, or
        // `usize efi_main(void *handle, void *system_table)` depending on the target.
        let llfty = if cx.sess().target.os.contains("uefi") {
            cx.type_func(&[cx.type_ptr(), cx.type_ptr()], cx.type_isize())
        } else if cx.sess().target.main_needs_argc_argv {
            cx.type_func(&[cx.type_int(), cx.type_ptr()], cx.type_int())
        } else {
            cx.type_func(&[], cx.type_int())
        };

        let main_ret_ty = cx.tcx().fn_sig(rust_main_def_id).no_bound_vars().unwrap().output();
        // Given that `main()` has no arguments,
        // then its return type cannot have
        // late-bound regions, since late-bound
        // regions must appear in the argument
        // listing.
        let main_ret_ty = cx.tcx().normalize_erasing_regions(
            ty::ParamEnv::reveal_all(),
            main_ret_ty.no_bound_vars().unwrap(),
        );

        let Some(llfn) = cx.declare_c_main(llfty) else {
            // FIXME: We should be smart and show a better diagnostic here.
            let span = cx.tcx().def_span(rust_main_def_id);
            cx.tcx().dcx().emit_fatal(errors::MultipleMainFunctions { span });
        };

        // `main` should respect same config for frame pointer elimination as rest of code
        cx.set_frame_pointer_type(llfn);
        cx.apply_target_cpu_attr(llfn);

        let llbb = Bx::append_block(cx, llfn, "top");
        let mut bx = Bx::build(cx, llbb);

        bx.insert_reference_to_gdb_debug_scripts_section_global();

        let isize_ty = cx.type_isize();
        let ptr_ty = cx.type_ptr();
        let (arg_argc, arg_argv) = get_argc_argv(cx, &mut bx);

        let (start_fn, start_ty, args, instance) = if let EntryFnType::Main { sigpipe } = entry_type
        {
            let start_def_id = cx.tcx().require_lang_item(LangItem::Start, None);
            let start_instance = ty::Instance::expect_resolve(
                cx.tcx(),
                ty::ParamEnv::reveal_all(),
                start_def_id,
                cx.tcx().mk_args(&[main_ret_ty.into()]),
                DUMMY_SP,
            );
            let start_fn = cx.get_fn_addr(start_instance);

            let i8_ty = cx.type_i8();
            let arg_sigpipe = bx.const_u8(sigpipe);

            let start_ty = cx.type_func(&[cx.val_ty(rust_main), isize_ty, ptr_ty, i8_ty], isize_ty);
            (
                start_fn,
                start_ty,
                vec![rust_main, arg_argc, arg_argv, arg_sigpipe],
                Some(start_instance),
            )
        } else {
            debug!("using user-defined start fn");
            let start_ty = cx.type_func(&[isize_ty, ptr_ty], isize_ty);
            (rust_main, start_ty, vec![arg_argc, arg_argv], None)
        };

        let result = bx.call(start_ty, None, None, start_fn, &args, None, instance);
        if cx.sess().target.os.contains("uefi") {
            bx.ret(result);
        } else {
            let cast = bx.intcast(result, cx.type_int(), true);
            bx.ret(cast);
        }

        llfn
    }
}

/// Obtain the `argc` and `argv` values to pass to the rust start function.
fn get_argc_argv<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    cx: &'a Bx::CodegenCx,
    bx: &mut Bx,
) -> (Bx::Value, Bx::Value) {
    if cx.sess().target.os.contains("uefi") {
        // Params for UEFI
        let param_handle = bx.get_param(0);
        let param_system_table = bx.get_param(1);
        let ptr_size = bx.tcx().data_layout.pointer_size;
        let ptr_align = bx.tcx().data_layout.pointer_align.abi;
        let arg_argc = bx.const_int(cx.type_isize(), 2);
        let arg_argv = bx.alloca(2 * ptr_size, ptr_align);
        bx.store(param_handle, arg_argv, ptr_align);
        let arg_argv_el1 = bx.inbounds_ptradd(arg_argv, bx.const_usize(ptr_size.bytes()));
        bx.store(param_system_table, arg_argv_el1, ptr_align);
        (arg_argc, arg_argv)
    } else if cx.sess().target.main_needs_argc_argv {
        // Params from native `main()` used as args for rust start function
        let param_argc = bx.get_param(0);
        let param_argv = bx.get_param(1);
        let arg_argc = bx.intcast(param_argc, cx.type_isize(), true);
        let arg_argv = param_argv;
        (arg_argc, arg_argv)
    } else {
        // The Rust start function doesn't need `argc` and `argv`, so just pass zeros.
        let arg_argc = bx.const_int(cx.type_int(), 0);
        let arg_argv = bx.const_null(cx.type_ptr());
        (arg_argc, arg_argv)
    }
}

/// This function returns all of the debugger visualizers specified for the
/// current crate as well as all upstream crates transitively that match the
/// `visualizer_type` specified.
pub fn collect_debugger_visualizers_transitive(
    tcx: TyCtxt<'_>,
    visualizer_type: DebuggerVisualizerType,
) -> BTreeSet<DebuggerVisualizerFile> {
    tcx.debugger_visualizers(LOCAL_CRATE)
        .iter()
        .chain(
            tcx.crates(())
                .iter()
                .filter(|&cnum| {
                    let used_crate_source = tcx.used_crate_source(*cnum);
                    used_crate_source.rlib.is_some() || used_crate_source.rmeta.is_some()
                })
                .flat_map(|&cnum| tcx.debugger_visualizers(cnum)),
        )
        .filter(|visualizer| visualizer.visualizer_type == visualizer_type)
        .cloned()
        .collect::<BTreeSet<_>>()
}

/// Decide allocator kind to codegen. If `Some(_)` this will be the same as
/// `tcx.allocator_kind`, but it may be `None` in more cases (e.g. if using
/// allocator definitions from a dylib dependency).
pub fn allocator_kind_for_codegen(tcx: TyCtxt<'_>) -> Option<AllocatorKind> {
    // If the crate doesn't have an `allocator_kind` set then there's definitely
    // no shim to generate. Otherwise we also check our dependency graph for all
    // our output crate types. If anything there looks like its a `Dynamic`
    // linkage, then it's already got an allocator shim and we'll be using that
    // one instead. If nothing exists then it's our job to generate the
    // allocator!
    let any_dynamic_crate = tcx.dependency_formats(()).iter().any(|(_, list)| {
        use rustc_middle::middle::dependency_format::Linkage;
        list.iter().any(|&linkage| linkage == Linkage::Dynamic)
    });
    if any_dynamic_crate { None } else { tcx.allocator_kind(()) }
}

pub fn codegen_crate<B: ExtraBackendMethods>(
    backend: B,
    tcx: TyCtxt<'_>,
    target_cpu: String,
    metadata: EncodedMetadata,
    need_metadata_module: bool,
) -> OngoingCodegen<B> {
    // Skip crate items and just output metadata in -Z no-codegen mode.
    if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() {
        let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, metadata, None);

        ongoing_codegen.codegen_finished(tcx);

        ongoing_codegen.check_for_errors(tcx.sess);

        return ongoing_codegen;
    }

    let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);

    // Run the monomorphization collector and partition the collected items into
    // codegen units.
    let codegen_units = tcx.collect_and_partition_mono_items(()).1;

    // Force all codegen_unit queries so they are already either red or green
    // when compile_codegen_unit accesses them. We are not able to re-execute
    // the codegen_unit query from just the DepNode, so an unknown color would
    // lead to having to re-execute compile_codegen_unit, possibly
    // unnecessarily.
    if tcx.dep_graph.is_fully_enabled() {
        for cgu in codegen_units {
            tcx.ensure().codegen_unit(cgu.name());
        }
    }

    let metadata_module = need_metadata_module.then(|| {
        // Emit compressed metadata object.
        let metadata_cgu_name =
            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata")).to_string();
        tcx.sess.time("write_compressed_metadata", || {
            let file_name =
                tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name));
            let data = create_compressed_metadata_file(
                tcx.sess,
                &metadata,
                &exported_symbols::metadata_symbol_name(tcx),
            );
            if let Err(error) = std::fs::write(&file_name, data) {
                tcx.dcx().emit_fatal(errors::MetadataObjectFileWrite { error });
            }
            CompiledModule {
                name: metadata_cgu_name,
                kind: ModuleKind::Metadata,
                object: Some(file_name),
                dwarf_object: None,
                bytecode: None,
                assembly: None,
                llvm_ir: None,
            }
        })
    });

    let ongoing_codegen =
        start_async_codegen(backend.clone(), tcx, target_cpu, metadata, metadata_module);

    // Codegen an allocator shim, if necessary.
    if let Some(kind) = allocator_kind_for_codegen(tcx) {
        let llmod_id =
            cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("allocator")).to_string();
        let module_llvm = tcx.sess.time("write_allocator_module", || {
            backend.codegen_allocator(
                tcx,
                &llmod_id,
                kind,
                // If allocator_kind is Some then alloc_error_handler_kind must
                // also be Some.
                tcx.alloc_error_handler_kind(()).unwrap(),
            )
        });

        ongoing_codegen.wait_for_signal_to_codegen_item();
        ongoing_codegen.check_for_errors(tcx.sess);

        // These modules are generally cheap and won't throw off scheduling.
        let cost = 0;
        submit_codegened_module_to_llvm(
            &backend,
            &ongoing_codegen.coordinator.sender,
            ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator },
            cost,
        );
    }

    // For better throughput during parallel processing by LLVM, we used to sort
    // CGUs largest to smallest. This would lead to better thread utilization
    // by, for example, preventing a large CGU from being processed last and
    // having only one LLVM thread working while the rest remained idle.
    //
    // However, this strategy would lead to high memory usage, as it meant the
    // LLVM-IR for all of the largest CGUs would be resident in memory at once.
    //
    // Instead, we can compromise by ordering CGUs such that the largest and
    // smallest are first, second largest and smallest are next, etc. If there
    // are large size variations, this can reduce memory usage significantly.
    let codegen_units: Vec<_> = {
        let mut sorted_cgus = codegen_units.iter().collect::<Vec<_>>();
        sorted_cgus.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));

        let (first_half, second_half) = sorted_cgus.split_at(sorted_cgus.len() / 2);
        first_half.iter().interleave(second_half.iter().rev()).copied().collect()
    };

    // Calculate the CGU reuse
    let cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
        codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, cgu)).collect::<Vec<_>>()
    });

    crate::assert_module_sources::assert_module_sources(tcx, &|cgu_reuse_tracker| {
        for (i, cgu) in codegen_units.iter().enumerate() {
            let cgu_reuse = cgu_reuse[i];
            cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse);
        }
    });

    let mut total_codegen_time = Duration::new(0, 0);
    let start_rss = tcx.sess.opts.unstable_opts.time_passes.then(|| get_resident_set_size());

    // The non-parallel compiler can only translate codegen units to LLVM IR
    // on a single thread, leading to a staircase effect where the N LLVM
    // threads have to wait on the single codegen threads to generate work
    // for them. The parallel compiler does not have this restriction, so
    // we can pre-load the LLVM queue in parallel before handing off
    // coordination to the OnGoingCodegen scheduler.
    //
    // This likely is a temporary measure. Once we don't have to support the
    // non-parallel compiler anymore, we can compile CGUs end-to-end in
    // parallel and get rid of the complicated scheduling logic.
    let mut pre_compiled_cgus = if tcx.sess.threads() > 1 {
        tcx.sess.time("compile_first_CGU_batch", || {
            // Try to find one CGU to compile per thread.
            let cgus: Vec<_> = cgu_reuse
                .iter()
                .enumerate()
                .filter(|&(_, reuse)| reuse == &CguReuse::No)
                .take(tcx.sess.threads())
                .collect();

            // Compile the found CGUs in parallel.
            let start_time = Instant::now();

            let pre_compiled_cgus = par_map(cgus, |(i, _)| {
                let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
                (i, module)
            });

            total_codegen_time += start_time.elapsed();

            pre_compiled_cgus
        })
    } else {
        FxHashMap::default()
    };

    for (i, cgu) in codegen_units.iter().enumerate() {
        ongoing_codegen.wait_for_signal_to_codegen_item();
        ongoing_codegen.check_for_errors(tcx.sess);

        let cgu_reuse = cgu_reuse[i];

        match cgu_reuse {
            CguReuse::No => {
                let (module, cost) = if let Some(cgu) = pre_compiled_cgus.remove(&i) {
                    cgu
                } else {
                    let start_time = Instant::now();
                    let module = backend.compile_codegen_unit(tcx, cgu.name());
                    total_codegen_time += start_time.elapsed();
                    module
                };
                // This will unwind if there are errors, which triggers our `AbortCodegenOnDrop`
                // guard. Unfortunately, just skipping the `submit_codegened_module_to_llvm` makes
                // compilation hang on post-monomorphization errors.
                tcx.dcx().abort_if_errors();

                submit_codegened_module_to_llvm(
                    &backend,
                    &ongoing_codegen.coordinator.sender,
                    module,
                    cost,
                );
            }
            CguReuse::PreLto => {
                submit_pre_lto_module_to_llvm(
                    &backend,
                    tcx,
                    &ongoing_codegen.coordinator.sender,
                    CachedModuleCodegen {
                        name: cgu.name().to_string(),
                        source: cgu.previous_work_product(tcx),
                    },
                );
            }
            CguReuse::PostLto => {
                submit_post_lto_module_to_llvm(
                    &backend,
                    &ongoing_codegen.coordinator.sender,
                    CachedModuleCodegen {
                        name: cgu.name().to_string(),
                        source: cgu.previous_work_product(tcx),
                    },
                );
            }
        }
    }

    ongoing_codegen.codegen_finished(tcx);

    // Since the main thread is sometimes blocked during codegen, we keep track
    // -Ztime-passes output manually.
    if tcx.sess.opts.unstable_opts.time_passes {
        let end_rss = get_resident_set_size();

        print_time_passes_entry(
            "codegen_to_LLVM_IR",
            total_codegen_time,
            start_rss.unwrap(),
            end_rss,
            tcx.sess.opts.unstable_opts.time_passes_format,
        );
    }

    ongoing_codegen.check_for_errors(tcx.sess);
    ongoing_codegen
}

/// Returns whether a call from the current crate to the [`Instance`] would produce a call
/// from `compiler_builtins` to a symbol the linker must resolve.
///
/// Such calls from `compiler_bultins` are effectively impossible for the linker to handle. Some
/// linkers will optimize such that dead calls to unresolved symbols are not an error, but this is
/// not guaranteed. So we used this function in codegen backends to ensure we do not generate any
/// unlinkable calls.
///
/// Note that calls to LLVM intrinsics are uniquely okay because they won't make it to the linker.
pub fn is_call_from_compiler_builtins_to_upstream_monomorphization<'tcx>(
    tcx: TyCtxt<'tcx>,
    instance: Instance<'tcx>,
) -> bool {
    fn is_llvm_intrinsic(tcx: TyCtxt<'_>, def_id: DefId) -> bool {
        if let Some(name) = tcx.codegen_fn_attrs(def_id).link_name {
            name.as_str().starts_with("llvm.")
        } else {
            false
        }
    }

    let def_id = instance.def_id();
    !def_id.is_local()
        && tcx.is_compiler_builtins(LOCAL_CRATE)
        && !is_llvm_intrinsic(tcx, def_id)
        && !tcx.should_codegen_locally(instance)
}

impl CrateInfo {
    pub fn new(tcx: TyCtxt<'_>, target_cpu: String) -> CrateInfo {
        let crate_types = tcx.crate_types().to_vec();
        let exported_symbols = crate_types
            .iter()
            .map(|&c| (c, crate::back::linker::exported_symbols(tcx, c)))
            .collect();
        let linked_symbols =
            crate_types.iter().map(|&c| (c, crate::back::linker::linked_symbols(tcx, c))).collect();
        let local_crate_name = tcx.crate_name(LOCAL_CRATE);
        let crate_attrs = tcx.hir().attrs(rustc_hir::CRATE_HIR_ID);
        let subsystem = attr::first_attr_value_str_by_name(crate_attrs, sym::windows_subsystem);
        let windows_subsystem = subsystem.map(|subsystem| {
            if subsystem != sym::windows && subsystem != sym::console {
                tcx.dcx().emit_fatal(errors::InvalidWindowsSubsystem { subsystem });
            }
            subsystem.to_string()
        });

        // This list is used when generating the command line to pass through to
        // system linker. The linker expects undefined symbols on the left of the
        // command line to be defined in libraries on the right, not the other way
        // around. For more info, see some comments in the add_used_library function
        // below.
        //
        // In order to get this left-to-right dependency ordering, we use the reverse
        // postorder of all crates putting the leaves at the right-most positions.
        let mut compiler_builtins = None;
        let mut used_crates: Vec<_> = tcx
            .postorder_cnums(())
            .iter()
            .rev()
            .copied()
            .filter(|&cnum| {
                let link = !tcx.dep_kind(cnum).macros_only();
                if link && tcx.is_compiler_builtins(cnum) {
                    compiler_builtins = Some(cnum);
                    return false;
                }
                link
            })
            .collect();
        // `compiler_builtins` are always placed last to ensure that they're linked correctly.
        used_crates.extend(compiler_builtins);

        let crates = tcx.crates(());
        let n_crates = crates.len();
        let mut info = CrateInfo {
            target_cpu,
            crate_types,
            exported_symbols,
            linked_symbols,
            local_crate_name,
            compiler_builtins,
            profiler_runtime: None,
            is_no_builtins: Default::default(),
            native_libraries: Default::default(),
            used_libraries: tcx.native_libraries(LOCAL_CRATE).iter().map(Into::into).collect(),
            crate_name: UnordMap::with_capacity(n_crates),
            used_crates,
            used_crate_source: UnordMap::with_capacity(n_crates),
            dependency_formats: tcx.dependency_formats(()).clone(),
            windows_subsystem,
            natvis_debugger_visualizers: Default::default(),
        };

        info.native_libraries.reserve(n_crates);

        for &cnum in crates.iter() {
            info.native_libraries
                .insert(cnum, tcx.native_libraries(cnum).iter().map(Into::into).collect());
            info.crate_name.insert(cnum, tcx.crate_name(cnum));

            let used_crate_source = tcx.used_crate_source(cnum);
            info.used_crate_source.insert(cnum, used_crate_source.clone());
            if tcx.is_profiler_runtime(cnum) {
                info.profiler_runtime = Some(cnum);
            }
            if tcx.is_no_builtins(cnum) {
                info.is_no_builtins.insert(cnum);
            }
        }

        // Handle circular dependencies in the standard library.
        // See comment before `add_linked_symbol_object` function for the details.
        // If global LTO is enabled then almost everything (*) is glued into a single object file,
        // so this logic is not necessary and can cause issues on some targets (due to weak lang
        // item symbols being "privatized" to that object file), so we disable it.
        // (*) Native libs, and `#[compiler_builtins]` and `#[no_builtins]` crates are not glued,
        // and we assume that they cannot define weak lang items. This is not currently enforced
        // by the compiler, but that's ok because all this stuff is unstable anyway.
        let target = &tcx.sess.target;
        if !are_upstream_rust_objects_already_included(tcx.sess) {
            let missing_weak_lang_items: FxIndexSet<Symbol> = info
                .used_crates
                .iter()
                .flat_map(|&cnum| tcx.missing_lang_items(cnum))
                .filter(|l| l.is_weak())
                .filter_map(|&l| {
                    let name = l.link_name()?;
                    lang_items::required(tcx, l).then_some(name)
                })
                .collect();
            let prefix = match (target.is_like_windows, target.arch.as_ref()) {
                (true, "x86") => "_",
                (true, "arm64ec") => "#",
                _ => "",
            };

            // This loop only adds new items to values of the hash map, so the order in which we
            // iterate over the values is not important.
            #[allow(rustc::potential_query_instability)]
            info.linked_symbols
                .iter_mut()
                .filter(|(crate_type, _)| {
                    !matches!(crate_type, CrateType::Rlib | CrateType::Staticlib)
                })
                .for_each(|(_, linked_symbols)| {
                    let mut symbols = missing_weak_lang_items
                        .iter()
                        .map(|item| (format!("{prefix}{item}"), SymbolExportKind::Text))
                        .collect::<Vec<_>>();
                    symbols.sort_unstable_by(|a, b| a.0.cmp(&b.0));
                    linked_symbols.extend(symbols);
                    if tcx.allocator_kind(()).is_some() {
                        // At least one crate needs a global allocator. This crate may be placed
                        // after the crate that defines it in the linker order, in which case some
                        // linkers return an error. By adding the global allocator shim methods to
                        // the linked_symbols list, linking the generated symbols.o will ensure that
                        // circular dependencies involving the global allocator don't lead to linker
                        // errors.
                        linked_symbols.extend(ALLOCATOR_METHODS.iter().map(|method| {
                            (
                                format!("{prefix}{}", global_fn_name(method.name).as_str()),
                                SymbolExportKind::Text,
                            )
                        }));
                    }
                });
        }

        let embed_visualizers = tcx.crate_types().iter().any(|&crate_type| match crate_type {
            CrateType::Executable | CrateType::Dylib | CrateType::Cdylib => {
                // These are crate types for which we invoke the linker and can embed
                // NatVis visualizers.
                true
            }
            CrateType::ProcMacro => {
                // We could embed NatVis for proc macro crates too (to improve the debugging
                // experience for them) but it does not seem like a good default, since
                // this is a rare use case and we don't want to slow down the common case.
                false
            }
            CrateType::Staticlib | CrateType::Rlib => {
                // We don't invoke the linker for these, so we don't need to collect the NatVis for them.
                false
            }
        });

        if target.is_like_msvc && embed_visualizers {
            info.natvis_debugger_visualizers =
                collect_debugger_visualizers_transitive(tcx, DebuggerVisualizerType::Natvis);
        }

        info
    }
}

pub fn provide(providers: &mut Providers) {
    providers.backend_optimization_level = |tcx, cratenum| {
        let for_speed = match tcx.sess.opts.optimize {
            // If globally no optimisation is done, #[optimize] has no effect.
            //
            // This is done because if we ended up "upgrading" to `-O2` here, we’d populate the
            // pass manager and it is likely that some module-wide passes (such as inliner or
            // cross-function constant propagation) would ignore the `optnone` annotation we put
            // on the functions, thus necessarily involving these functions into optimisations.
            config::OptLevel::No => return config::OptLevel::No,
            // If globally optimise-speed is already specified, just use that level.
            config::OptLevel::Less => return config::OptLevel::Less,
            config::OptLevel::Default => return config::OptLevel::Default,
            config::OptLevel::Aggressive => return config::OptLevel::Aggressive,
            // If globally optimize-for-size has been requested, use -O2 instead (if optimize(size)
            // are present).
            config::OptLevel::Size => config::OptLevel::Default,
            config::OptLevel::SizeMin => config::OptLevel::Default,
        };

        let (defids, _) = tcx.collect_and_partition_mono_items(cratenum);

        let any_for_speed = defids.items().any(|id| {
            let CodegenFnAttrs { optimize, .. } = tcx.codegen_fn_attrs(*id);
            match optimize {
                attr::OptimizeAttr::None | attr::OptimizeAttr::Size => false,
                attr::OptimizeAttr::Speed => true,
            }
        });

        if any_for_speed {
            return for_speed;
        }

        tcx.sess.opts.optimize
    };
}

pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> CguReuse {
    if !tcx.dep_graph.is_fully_enabled() {
        return CguReuse::No;
    }

    let work_product_id = &cgu.work_product_id();
    if tcx.dep_graph.previous_work_product(work_product_id).is_none() {
        // We don't have anything cached for this CGU. This can happen
        // if the CGU did not exist in the previous session.
        return CguReuse::No;
    }

    // Try to mark the CGU as green. If it we can do so, it means that nothing
    // affecting the LLVM module has changed and we can re-use a cached version.
    // If we compile with any kind of LTO, this means we can re-use the bitcode
    // of the Pre-LTO stage (possibly also the Post-LTO version but we'll only
    // know that later). If we are not doing LTO, there is only one optimized
    // version of each module, so we re-use that.
    let dep_node = cgu.codegen_dep_node(tcx);
    assert!(
        !tcx.dep_graph.dep_node_exists(&dep_node),
        "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
        cgu.name()
    );

    if tcx.try_mark_green(&dep_node) {
        // We can re-use either the pre- or the post-thinlto state. If no LTO is
        // being performed then we can use post-LTO artifacts, otherwise we must
        // reuse pre-LTO artifacts
        match compute_per_cgu_lto_type(
            &tcx.sess.lto(),
            &tcx.sess.opts,
            tcx.crate_types(),
            ModuleKind::Regular,
        ) {
            ComputedLtoType::No => CguReuse::PostLto,
            _ => CguReuse::PreLto,
        }
    } else {
        CguReuse::No
    }
}