Merge branch 'master' of github.com:dpeerlab/Palantir

dpeerlab · Nov 30, 2024 · e8a03c7 · e8a03c7
2 parents d5d13be + 4d0f031
commit e8a03c7
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -77,6 +77,10 @@ Release Notes
  * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity
  * **bugfix**: `run_local_variability` for dense expression arrays now runs much faster and more accurate
 
+ ### Version 1.3.5rc
+ * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity
+
+>>>>>>> 4d0f0314270d4c8463fa8fd69a217ac1a6491f6a
  ### Version 1.3.4
  * avoid devision by zero in `select_branch_cells` for very small datasets
  * make branch selection robust against NaNs

diff --git a/src/palantir/utils.py b/src/palantir/utils.py
@@ -588,6 +588,8 @@ def run_magic_imputation(
     expression_key: str = None,
     imputation_key: str = "MAGIC_imputed_data",
     n_jobs: int = -1,
+    sparse: bool = True,
+    clip_threshold: float = 1e-2,
 ) -> Union[pd.DataFrame, None, csr_matrix]:
     """
     Run MAGIC imputation on the data.
@@ -611,6 +613,10 @@ def run_magic_imputation(
         Key to store the imputed data in layers of data if it is a sc.AnnData object. Default is 'MAGIC_imputed_data'.
     n_jobs : int, optional
         Number of cores to use for parallel processing. If -1, all available cores are used. Default is -1.
+    sparse : bool, optional
+        If True, sets values below `clip_threshold` to 0 to return a sparse matrix. If False, return a dense matrix. Default is True.
+    clip_threshold: float, optional
+        Threshold value for setting values to 0 when returning a sparse matrix. Default is 1e-2. Unused if `sparse` is False.
 
     Returns
     -------
@@ -656,20 +662,31 @@ def run_magic_imputation(
 
     # Stack the results together
     if issparse(X):
-        imputed_data = hstack(res).todense()
+        imputed_data = hstack(res)
     else:
         imputed_data = np.hstack(res)
 
-    # Set small values to zero
-    imputed_data[imputed_data < 1e-2] = 0
+    # Set small values to zero if returning sparse matrix 
+    if sparse:
+        if issparse(X):
+            imputed_data.data[imputed_data.data < clip_threshold] = 0
+            imputed_data.eliminate_zeros()
+        else:
+            imputed_data = np.where(imputed_data < clip_threshold, 0, imputed_data)
+            imputed_data = csr_matrix(imputed_data)
+    else:
+        if issparse(X):
+            imputed_data = imputed_data.todense()
 
     # Clean up
     gc.collect()
 
     if isinstance(data, sc.AnnData):
-        data.layers[imputation_key] = np.asarray(imputed_data)
+        data.layers[imputation_key] = imputed_data
 
     if isinstance(data, pd.DataFrame):
+        if issparse(imputed_data):
+            imputed_data = imputed_data.toarray()
         imputed_data = pd.DataFrame(
             imputed_data, index=data.index, columns=data.columns
         )

diff --git a/src/palantir/version.py b/src/palantir/version.py
@@ -1,3 +1,3 @@
-__version__ = "1.3.4"
+__version__ = "1.3.5rc"
 __author__ = "Palantir development team"
 __author_email__ = "[email protected]"
diff --git a/tests/utils_run_magic_imputation.py b/tests/utils_run_magic_imputation.py
@@ -16,6 +16,8 @@ def mock_dm_res():
 def test_run_magic_imputation_ndarray(mock_dm_res):
     data = np.random.rand(50, 20)
     result = run_magic_imputation(data, dm_res=mock_dm_res)
+    assert isinstance(result, csr_matrix)
+    result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
     assert isinstance(result, np.ndarray)
 
 
@@ -30,6 +32,8 @@ def test_run_magic_imputation_dataframe(mock_dm_res):
 def test_run_magic_imputation_csr(mock_dm_res):
     data = csr_matrix(np.random.rand(50, 20))
     result = run_magic_imputation(data, dm_res=mock_dm_res)
+    assert isinstance(result, csr_matrix)
+    result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False)
     assert isinstance(result, np.ndarray)
 
 
@@ -39,7 +43,7 @@ def test_run_magic_imputation_anndata():
     data.obsp["DM_Similarity"] = np.random.rand(50, 50)
     result = run_magic_imputation(data)
     assert "MAGIC_imputed_data" in data.layers
-    assert isinstance(result, np.ndarray)
+    assert isinstance(result, csr_matrix)
 
 
 # Test with AnnData and custom keys

diff --git a/tests/util_run_pca.py → tests/utils_run_pca.py b/tests/util_run_pca.py → tests/utils_run_pca.py