Add write_dir argument to csv_to_wfdb. Fixes #67. (#492)

tompollard · web-flow · commit 6a0de803b4d3 · 2024-07-11T14:28:06.000-04:00
As discussed in #490, https://github.com/MIT-LCP/wfdb-python/blob/34b989e08435c1a82d31bdd2800c4c14147e3e93/wfdb/io/convert/csv.py#L10 currently "strips the path from the input .csv, then writes the output to .dat and .hea". It's inconvenient not to be able to specify the output directory. This pull request adds a new `output_dir` argument to the `csv_to_wfdb` function. By default `output_dir` is set to None, which will maintain backwards compatibility. Setting `output_dir` to a directory will mean that output files are saved to this directory. I have set this to a WIP, because I haven't tested the new behaviour (other than running `pytest`). @jshaffer94247, if you have an opportunity to test the fix, I'd appreciate your feedback.
diff --git a/tests/io/test_convert.py b/tests/io/test_convert.py
@@ -1,14 +1,22 @@
+import os
+import shutil
+import unittest
+
 import numpy as np
 
 from wfdb.io.record import rdrecord
 from wfdb.io.convert.edf import read_edf
+from wfdb.io.convert.csv import csv_to_wfdb
+
 
+class TestEdfToWfdb:
+    """
+    Tests for the io.convert.edf module.
+    """
 
-class TestConvert:
     def test_edf_uniform(self):
         """
         EDF format conversion to MIT for uniform sample rates.
-
         """
         # Uniform sample rates
         record_MIT = rdrecord("sample-data/n16").__dict__
@@ -60,7 +68,6 @@ def test_edf_uniform(self):
     def test_edf_non_uniform(self):
         """
         EDF format conversion to MIT for non-uniform sample rates.
-
         """
         # Non-uniform sample rates
         record_MIT = rdrecord("sample-data/wave_4").__dict__
@@ -108,3 +115,65 @@ def test_edf_non_uniform(self):
 
         target_results = len(fields) * [True]
         assert np.array_equal(test_results, target_results)
+
+
+class TestCsvToWfdb(unittest.TestCase):
+    """
+    Tests for the io.convert.csv module.
+    """
+
+    def setUp(self):
+        """
+        Create a temporary directory containing data for testing.
+
+        Load 100.dat file for comparison to 100.csv file.
+        """
+        self.test_dir = "test_output"
+        os.makedirs(self.test_dir, exist_ok=True)
+
+        self.record_100_csv = "sample-data/100.csv"
+        self.record_100_dat = rdrecord("sample-data/100", physical=True)
+
+    def tearDown(self):
+        """
+        Remove the temporary directory after the test.
+        """
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+
+    def test_write_dir(self):
+        """
+        Call the function with the write_dir argument.
+        """
+        csv_to_wfdb(
+            file_name=self.record_100_csv,
+            fs=360,
+            units="mV",
+            write_dir=self.test_dir,
+        )
+
+        # Check if the output files are created in the specified directory
+        base_name = os.path.splitext(os.path.basename(self.record_100_csv))[0]
+        expected_dat_file = os.path.join(self.test_dir, f"{base_name}.dat")
+        expected_hea_file = os.path.join(self.test_dir, f"{base_name}.hea")
+
+        self.assertTrue(os.path.exists(expected_dat_file))
+        self.assertTrue(os.path.exists(expected_hea_file))
+
+        # Check that newly written file matches the 100.dat file
+        record_write = rdrecord(os.path.join(self.test_dir, base_name))
+
+        self.assertEqual(record_write.fs, 360)
+        self.assertEqual(record_write.fs, self.record_100_dat.fs)
+        self.assertEqual(record_write.units, ["mV", "mV"])
+        self.assertEqual(record_write.units, self.record_100_dat.units)
+        self.assertEqual(record_write.sig_name, ["MLII", "V5"])
+        self.assertEqual(record_write.sig_name, self.record_100_dat.sig_name)
+        self.assertEqual(record_write.p_signal.size, 1300000)
+        self.assertEqual(
+            record_write.p_signal.size, self.record_100_dat.p_signal.size
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/wfdb/io/convert/csv.py b/wfdb/io/convert/csv.py
@@ -33,6 +33,7 @@ def csv_to_wfdb(
     header=True,
     delimiter=",",
     verbose=False,
+    write_dir="",
 ):
     """
     Read a WFDB header file and return either a `Record` object with the
@@ -235,6 +236,10 @@ def csv_to_wfdb(
     verbose : bool, optional
         Whether to print all the information read about the file (True) or
         not (False).
+    write_dir : str, optional
+        The directory where the output files will be saved. If write_dir is not
+        provided, the output files will be saved in the same directory as the
+        input file.
 
     Returns
     -------
@@ -291,6 +296,7 @@ def csv_to_wfdb(
         df_CSV = pd.read_csv(file_name, delimiter=delimiter, header=None)
     if verbose:
         print("Successfully read CSV")
+
     # Extract the entire signal from the dataframe
     p_signal = df_CSV.values
     # The dataframe should be in (`sig_len`, `n_sig`) dimensions
@@ -300,10 +306,11 @@ def csv_to_wfdb(
     n_sig = p_signal.shape[1]
     if verbose:
         print("Number of signals: {}".format(n_sig))
+
     # Check if signal names are valid and set defaults
     if not sig_name:
         if header:
-            sig_name = df_CSV.columns.to_list()
+            sig_name = df_CSV.columns.tolist()
             if any(map(str.isdigit, sig_name)):
                 print(
                     "WARNING: One or more of your signal names are numbers, this "
@@ -318,15 +325,12 @@ def csv_to_wfdb(
             if verbose:
                 print("Signal names: {}".format(sig_name))
 
-    # Set the output header file name to be the same, remove path
-    if os.sep in file_name:
-        file_name = file_name.split(os.sep)[-1]
-    record_name = file_name.replace(".csv", "")
+    record_name = os.path.splitext(os.path.basename(file_name))[0]
     if verbose:
-        print("Output header: {}.hea".format(record_name))
+        print("Record name: {}.hea".format(record_name))
 
     # Replace the CSV file tag with DAT
-    dat_file_name = file_name.replace(".csv", ".dat")
+    dat_file_name = record_name + ".dat"
     dat_file_name = [dat_file_name] * n_sig
     if verbose:
         print("Output record: {}".format(dat_file_name[0]))
@@ -450,7 +454,6 @@ def csv_to_wfdb(
         if verbose:
             print("Record generated successfully")
         return record
-
     else:
         # Write the information to a record and header file
         wrsamp(
@@ -465,6 +468,7 @@ def csv_to_wfdb(
             comments=comments,
             base_time=base_time,
             base_date=base_date,
+            write_dir=write_dir,
         )
         if verbose:
             print("File generated successfully")