Index: llvm/lib/Target/X86/X86FrameLowering.cpp
--- llvm/lib/Target/X86/X86FrameLowering.cpp.orig
+++ llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -15,6 +15,7 @@
 #include "X86InstrBuilder.h"
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
+#include "X86ReturnProtectorLowering.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
@@ -49,7 +50,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget 
                                    MaybeAlign StackAlignOverride)
     : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
                           STI.is64Bit() ? -8 : -4),
-      STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
+      STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() {
   // Cache a bunch of frame-related predicates for this subtarget.
   SlotSize = TRI->getSlotSize();
   Is64Bit = STI.is64Bit();
@@ -57,6 +58,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget 
   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
   Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
   StackPtr = TRI->getStackRegister();
+  SaveArgs = Is64Bit ? STI.getSaveArgs() : 0;
 }
 
 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
@@ -102,7 +104,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF
           MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
           MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
           MFI.hasStackMap() || MFI.hasPatchPoint() ||
-          (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
+          (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()) ||
+          SaveArgs);
 }
 
 static unsigned getSUBriOpcode(bool IsLP64) {
@@ -1405,6 +1408,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasic
   }
 }
 
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+                                                const X86Subtarget &Subtarget) {
+  assert(Subtarget.is64Bit());
+
+  if (Subtarget.isCallingConvWin64(CallConv)) {
+    static const MCPhysReg GPR64ArgRegsWin64[] = {
+      X86::RCX, X86::RDX, X86::R8,  X86::R9
+    };
+    return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+  }
+
+  static const MCPhysReg GPR64ArgRegs64Bit[] = {
+    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+  };
+  return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
 bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
   // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
   // clobbered by any interrupt handler.
@@ -1802,6 +1823,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &M
               .addReg(StackPtr)
               .setMIFlag(MachineInstr::FrameSetup);
 
+      if (SaveArgs && !Fn.arg_empty()) {
+        ArrayRef<MCPhysReg> GPRs =
+          get64BitArgumentGPRs(Fn.getCallingConv(), STI);
+        unsigned arg_size = Fn.arg_size();
+        unsigned RI = 0;
+        int64_t SaveSize = 0;
+
+        if (Fn.hasStructRetAttr()) {
+          GPRs = GPRs.drop_front(1);
+          arg_size--;
+        }
+
+        for (MCPhysReg Reg : GPRs) {
+          if (++RI > arg_size)
+            break;
+
+          SaveSize += SlotSize;
+
+          BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+            .addReg(Reg)
+            .setMIFlag(MachineInstr::FrameSetup);
+        }
+
+        // Realign the stack. PUSHes are the most space efficient.
+        while (SaveSize % getStackAlignment()) {
+          BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+            .addReg(GPRs.front())
+            .setMIFlag(MachineInstr::FrameSetup);
+
+          SaveSize += SlotSize;
+        }
+
+        //dlg StackSize -= SaveSize;
+        //dlg MFI.setStackSize(StackSize);
+        X86FI->setSaveArgSize(SaveSize);
+      }
+
         if (NeedsDwarfCFI) {
           if (ArgBaseReg.isValid()) {
             SmallString<64> CfaExpr;
@@ -2361,10 +2419,17 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
       int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
       emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
     }
-    // Pop EBP.
-    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+
+    if (X86FI->getSaveArgSize()) {
+      // LEAVE is effectively mov rbp,rsp; pop rbp
+      BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64))
+        .setMIFlag(MachineInstr::FrameDestroy);
+    } else {
+      // Pop EBP.
+      BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
             MachineFramePtr)
         .setMIFlag(MachineInstr::FrameDestroy);
+    }
 
     // We need to reset FP to its untagged state on return. Bit 60 is currently
     // used to show the presence of an extended frame.
@@ -2405,6 +2470,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
     if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
       if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
           (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+          (Opc != X86::LEAVE64 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
           (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
           (Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy)))
         break;
@@ -2588,6 +2654,9 @@ StackOffset X86FrameLowering::getFrameIndexReference(c
            "FPDelta isn't aligned per the Win64 ABI!");
   }
 
+  if (FI >= 0)
+    Offset -= X86FI->getSaveArgSize();
+
   if (FrameReg == TRI->getFramePtr()) {
     // Skip saved EBP/RBP
     Offset += SlotSize;
@@ -4052,6 +4121,10 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(Machine
   addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
                     UnwindHelpFI)
       .addImm(-2);
+}
+
+const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const {
+  return &RPL;
 }
 
 void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
