ANDROID: Re-enable fast mremap and fix UAF with SPF SPF attempts page faults without taking the mmap lock, but takes the PTL. If there is a concurrent fast mremap (at PMD/PUD level), this can lead to a UAF as fast mremap will only take the PTL locks at the PMD/PUD level. SPF cannot take the PTL locks at the larger subtree granularity since this introduces much contention in the page fault paths. To address the race: 1) Only try fast mremaps if there are no users of the VMA. Android is concerned with this optimization in the context of GC stop-the-world pause. So there are no other threads active and this should almost always succeed. 2) Speculative faults detect ongoing fast mremaps and fallback to conventional fault handling (taking mmap read lock). Bug: 263177905 Bug: 264693867 Change-Id: I23917e493ddc8576de19883cac053dfde9982b7f Signed-off-by: Kalesh Singh <kaleshsingh@google.com>

commit: 0e5c0e91bbeed6ba42c11d5e82aa71e85d2e45e4 [log] [tgz]
author: Kalesh Singh <kaleshsingh@google.com> Mon Dec 19 21:07:49 2022 -0800
committer: Kalesh Singh <kaleshsingh@google.com> Tue Jan 10 08:40:35 2023 -0800
tree: c52205b4d2007e96c9ee609f70ca231d529dd720
parent: 1e4abab8b49ceef6e422100bbc6fdb345a4274ed [diff]
diff --git a/mm/mmap.c b/mm/mmap.c
index 29b373b..69c0965 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -2416,8 +2416,22 @@
 
 	read_lock(&mm->mm_rb_lock);
 	vma = __find_vma(mm, addr);
-	if (vma)
-		atomic_inc(&vma->vm_ref_count);
+
+	/*
+	 * If there is a concurrent fast mremap, bail out since the entire
+	 * PMD/PUD subtree may have been remapped.
+	 *
+	 * This is usually safe for conventional mremap since it takes the
+	 * PTE locks as does SPF. However fast mremap only takes the lock
+	 * at the PMD/PUD level which is ok as it is done with the mmap
+	 * write lock held. But since SPF, as the term implies forgoes,
+	 * taking the mmap read lock and also cannot take PTL lock at the
+	 * larger PMD/PUD granualrity, since it would introduce huge
+	 * contention in the page fault path; fall back to regular fault
+	 * handling.
+	 */
+	if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count))
+		vma = NULL;
 	read_unlock(&mm->mm_rb_lock);
 
 	return vma;

diff --git a/mm/mremap.c b/mm/mremap.c
index 6abbaf1..a6484e8 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c

@@ -210,11 +210,39 @@
 		drop_rmap_locks(vma);
 }
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
+{
+	/*
+	 * If we have the only reference, swap the refcount to -1. This
+	 * will prevent other concurrent references by get_vma() for SPFs.
+	 */
+	return atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1;
+}
+
 /*
- * Speculative page fault handlers will not detect page table changes done
- * without ptl locking.
+ * Restore the VMA reference count to 1 after a fast mremap.
  */
-#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
+static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
+{
+	/*
+	 * This should only be called after a corresponding,
+	 * successful trylock_vma_ref_count().
+	 */
+	VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1,
+		      vma);
+}
+#else	/* !CONFIG_SPECULATIVE_PAGE_FAULT */
+static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
+{
+	return true;
+}
+static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
+{
+}
+#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
+
+#ifdef CONFIG_HAVE_MOVE_PMD
 static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
 {
@@ -249,6 +277,14 @@
 		return false;
 
 	/*
+	 * We hold both exclusive mmap_lock and rmap_lock at this point and
+	 * cannot block. If we cannot immediately take exclusive ownership
+	 * of the VMA fallback to the move_ptes().
+	 */
+	if (!trylock_vma_ref_count(vma))
+		return false;
+
+	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
 	 */
@@ -270,6 +306,7 @@
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);
 
+	unlock_vma_ref_count(vma);
 	return true;
 }
 #else
@@ -281,11 +318,7 @@
 }
 #endif
 
-/*
- * Speculative page fault handlers will not detect page table changes done
- * without ptl locking.
- */
-#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
+#ifdef CONFIG_HAVE_MOVE_PUD
 static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
 {
@@ -301,6 +334,14 @@
 		return false;
 
 	/*
+	 * We hold both exclusive mmap_lock and rmap_lock at this point and
+	 * cannot block. If we cannot immediately take exclusive ownership
+	 * of the VMA fallback to the move_ptes().
+	 */
+	if (!trylock_vma_ref_count(vma))
+		return false;
+
+	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
 	 */
@@ -322,6 +363,7 @@
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);
 
+	unlock_vma_ref_count(vma);
 	return true;
 }
 #else
commit	0e5c0e91bbeed6ba42c11d5e82aa71e85d2e45e4	[log] [tgz]
author	Kalesh Singh <kaleshsingh@google.com>	Mon Dec 19 21:07:49 2022 -0800
committer	Kalesh Singh <kaleshsingh@google.com>	Tue Jan 10 08:40:35 2023 -0800
tree	c52205b4d2007e96c9ee609f70ca231d529dd720
parent	1e4abab8b49ceef6e422100bbc6fdb345a4274ed [diff]