Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tool for migrating stellarphot v1 data to stellarphot v2 #471

Merged
merged 6 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions stellarphot/conftest.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little confused why you created a fixture for this rather than just creating a new and old version of the testing file... it it just to allow iteration of test cases via the fixture?

Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,26 @@ def simple_photometry_data():
good_star = good_star | (pd_input["star_id"] == an_id)

return pd_input[good_star & first_slice]


@pytest.fixture
def stellphotv1_photometry_data(two_filters):
"""
Load photometry data form version 1 of stellarphot.

By default the data has two filters, with the filter name part of the column name,
e.g. "mag_inst_B" and "mag_inst_ip".

Parameters
----------
two_filters : bool
If True, return data with two filters. If False, return data with only the "B"
filter and column name "mag_inst_B".
"""
# Grab the test photometry file and simplify it a bit.
data_file = get_pkg_data_filename("utils/tests/data/sp1-data-two-filters.csv")
data = Table.read(data_file)
if not two_filters:
data = data[data["filter"] == "B"]
del data["mag_inst_ip"]
return data
11 changes: 7 additions & 4 deletions stellarphot/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@ def __init__(
**kwargs,
)

# From this point forwarsd we should be using self to get at any data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# From this point forwarsd we should be using self to get at any data
# From this point forward we should be using self to get at any data

Same typo noted in #470

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was fixed there

# columns, because that is where BaseEnhancedTable has put the data.

# Perform input validation
if not isinstance(self.observatory, Observatory):
raise TypeError(
Expand Down Expand Up @@ -465,24 +468,24 @@ def __init__(
]
cnts_unit = self[counts_columns[0]].unit
for this_col in counts_columns[1:]:
if input_data[this_col].unit != cnts_unit:
if self[this_col].unit != cnts_unit:
raise ValueError(
f"input_data['{this_col}'] has inconsistent units "
f"with input_data['{counts_columns[0]}'] (should "
f"be {cnts_unit} but it's "
f"{input_data[this_col].unit})."
f"{self[this_col].unit})."
)
for this_col in counts_per_pixel_columns:
if cnts_unit is None:
perpixel = u.pixel**-1
else:
perpixel = cnts_unit * u.pixel**-1
if input_data[this_col].unit != perpixel:
if self[this_col].unit != perpixel:
raise ValueError(
f"input_data['{this_col}'] has inconsistent units "
f"with input_data['{counts_columns[0]}'] (should "
f"be {perpixel} but it's "
f"{input_data[this_col].unit})."
f"{self[this_col].unit})."
)

# Compute additional columns
Expand Down
17 changes: 17 additions & 0 deletions stellarphot/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,23 @@ def test_photometry_blank():
assert test_base.observatory is None


def test_photometry_with_colname_map(feder_cg_16m, feder_passbands, feder_obs):
# Rename one of the columns in the test data to something else
# and provide a colname_map that should fix it.
# Regression test for #469
this_phot_data = testphot_clean.copy()
this_phot_data.rename_column("aperture_net_cnts", "bad_column")
colname_map = {"bad_column": "aperture_net_cnts"}
pd = PhotometryData(
input_data=this_phot_data,
colname_map=colname_map,
observatory=feder_obs,
camera=feder_cg_16m,
passband_map=feder_passbands,
)
assert "bad_column" not in pd.columns


@pytest.mark.parametrize("bjd_coordinates", [None, "custom"])
def test_photometry_data(feder_cg_16m, feder_passbands, feder_obs, bjd_coordinates):
# Create photometry data instance
Expand Down
7 changes: 7 additions & 0 deletions stellarphot/utils/tests/data/sp1-data-two-filters.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id,xcenter,ycenter,aperture_sum,annulus_sum,RA,Dec,sky_per_pix_avg,sky_per_pix_med,sky_per_pix_std,fwhm_x,fwhm_y,width,aperture,aperture_area,annulus_inner,annulus_outer,annulus_area,exposure,date-obs,night,aperture_net_flux,BJD,mag_inst_B,airmass,filter,file,star_id,mag_inst_ip,mag_error,noise,noise-aij,snr
1,2060.7868674508622,2051.3477931606344,4558.72114944458,23128.44873723574,273.58534757493095,41.856267845141176,8.907166779927682,8.961146354675293,6.977002235997995,13.961973349823774,13.961973349823774,13.961973349823774,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,120.0,2022-07-28T04:42:14.169,59787,3859.153906441584,2459788.699649941,-4.208505279923923,1.007,B,V0533-Her-S001-R001-C001-B_dupe-1.fit,1,,0.023016459819425385,122.71497498500277,81.80998332333517,47.17216346554141
2,1743.9566243727818,946.0920405359641,722.0966237336397,22886.43977989664,273.45195540156385,41.706531666614055,8.693135974449536,8.527806758880615,7.18608421380037,16.85386458736935,16.85386458736935,16.85386458736935,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,120.0,2022-07-28T04:42:14.169,59787,39.339320883944424,2459788.699649941,0.770657821461399,1.007,B,V0533-Her-S001-R001-C001-B_dupe-1.fit,2,,1.7746956306655253,96.45343025945039,64.3022868396336,0.6117872756540456
3,3171.0984369851485,2934.0442420208533,770.3665702939034,23303.32351740077,273.8639834384531,41.93476999987169,8.99394964006205,9.075422286987305,6.880121316116393,17.092103126276708,17.092103126276708,17.092103126276708,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,120.0,2022-07-28T04:42:14.169,59787,63.983417394515754,2459788.699649941,0.24255638670821864,1.007,B,V0533-Her-S001-R001-C001-B_dupe-1.fit,3,,1.095447669806021,96.83358420987028,64.55572280658019,0.9911347067745
1,2047.1738198248745,2062.844492583953,1793.4490242004395,27321.00808480289,273.5854017385322,41.856221745014764,10.386397372620442,9.995248794555664,7.854524767121713,16.35578142918151,16.35578142918151,16.35578142918151,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,90.0,2022-07-28T04:35:10.800,59787,977.7032821232217,2459788.694576323,,1.005,ip,V0533-Her-S001-R001-C001-ip_dupe-1.fit,1,-3.0301395567958185,0.07726468461779071,104.36504105943891,69.57669403962593,14.052166398800027
2,1729.874997960989,957.9549052141231,765.1158067508368,24546.40877101966,273.45193160013906,41.70657516276639,9.466155877430515,9.321210384368896,7.16637812130639,16.866278034457444,16.866278034457444,16.866278034457444,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,90.0,2022-07-28T04:35:10.800,59787,21.645662694048042,2459788.694576323,,1.005,ip,V0533-Her-S001-R001-C001-ip_dupe-1.fit,2,1.1069509096572534,3.232819760097885,96.67614346212044,64.45076230808029,0.3358480477015908
3,3156.634286606488,2945.927411810049,771.466923430562,24204.438097196165,273.86388066307694,41.934825013893715,9.327613381984222,9.244649887084961,7.142938029341322,16.831798604768036,16.831798604768036,16.831798604768036,5.0,78.53981633974483,20.0,35.0,2591.8139392115795,90.0,2022-07-28T04:35:10.800,59787,38.877881521375116,2459788.694576323,,1.005,ip,V0533-Her-S001-R001-C001-ip_dupe-1.fit,3,0.4711216461159047,1.8007753309289039,96.72284525475827,64.48189683650551,0.6029270761055676
104 changes: 104 additions & 0 deletions stellarphot/utils/tests/test_version_migrator.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like a through test suite as far as I can tell, kind of explains the weird fixture I noted earlier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, was using it a few different times

Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import pytest
from astropy import units as u
from astropy.table import Table
from packaging.version import InvalidVersion

from stellarphot import PhotometryData
from stellarphot.settings import Camera, Observatory
from stellarphot.settings.constants import TEST_CAMERA_VALUES, TEST_OBSERVATORY_SETTINGS
from stellarphot.utils.version_migrator import VersionMigrator


class TestVersionMigrator:
@pytest.mark.parametrize(
"from_version, to_version",
[
(None, None),
("1", None),
(None, "2"),
("1", "2"),
],
)
def test_init(self, from_version, to_version):
# Test a few valid ways to call the constructor
vm_args = {}
result_version = {}
if from_version is not None:
vm_args["from_version"] = from_version
result_version["from_version"] = from_version
else:
result_version["from_version"] = "1"

if to_version is not None:
vm_args["to_version"] = to_version
result_version["to_version"] = to_version
else:
result_version["to_version"] = "2"

vm = VersionMigrator(**vm_args)
assert str(vm.from_version) == result_version["from_version"]
assert str(vm.to_version) == result_version["to_version"]

def test_init_bad_versions(self):
# Test some invalid version arguments

# Migrator only increases version
with pytest.raises(ValueError, match="Can only migrate from a lower version"):
VersionMigrator(from_version="2", to_version="1")

# Nothing to do if the versions are the same
with pytest.raises(ValueError, match="Can only migrate from a lower version"):
VersionMigrator(from_version="2", to_version="2")

# An invalid version should raise an error
with pytest.raises(InvalidVersion, match="notta parsable version"):
VersionMigrator(from_version="notta parsable version")
mwcraig marked this conversation as resolved.
Show resolved Hide resolved

# An unknown from or to version should raise an error
with pytest.raises(ValueError, match="Unknown version"):
VersionMigrator(from_version="3")

with pytest.raises(ValueError, match="Unknown version"):
VersionMigrator(to_version="3")

# The parameter two_filters is passed to the fixture stellphotv1_photometry_data
# and is used to determine whether the test data contains data with one or
# two filters.
@pytest.mark.parametrize("two_filters", [True, False])
@pytest.mark.parametrize("exposure_unit", [None, u.second])
def test_migrate_v1_v2_mag_names_with_band(
self, stellphotv1_photometry_data, exposure_unit
):
# Test migrating from v1 to v2
camera = Camera(**TEST_CAMERA_VALUES)
observatory = Observatory(**TEST_OBSERVATORY_SETTINGS)
vm = VersionMigrator(
from_version="1", to_version="2", camera=camera, observatory=observatory
)
if exposure_unit is not None:
stellphotv1_photometry_data["exposure"] *= exposure_unit
sp2 = vm.migrate(stellphotv1_photometry_data)

# Make sure the instrumental magnitudes have ended up in the right place
for passband in set(sp2["passband"]):
in_band_2 = sp2["passband"] == passband
in_band_1 = stellphotv1_photometry_data["filter"] == passband
assert all(
sp2["mag_inst"][in_band_2]
== stellphotv1_photometry_data[f"mag_inst_{passband}"][in_band_1]
)

# Make sure we can initialize a PhotometryData object from the migrated data
# To ensure we go through the full init, change sp2 to a plain Table.
input_data = Table(sp2)
# Remove the autogenerated column
del input_data["night"]

# Make the new PhotometryData object
sp2_pd = PhotometryData(input_data=input_data)
# Check that the data is the same
assert all(sp2_pd == sp2)
# Check that we really got a new object
assert sp2_pd is not sp2
# Check the metadata is the same
assert sp2_pd.meta == sp2.meta
174 changes: 174 additions & 0 deletions stellarphot/utils/version_migrator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import numpy as np
from astropy import units as u
from astropy.time import Time
from packaging.version import Version

from stellarphot import PhotometryData
from stellarphot.settings import Camera, Observatory, PassbandMap


class VersionMigrator:
"""
Migrate data from one stellarphot major version to another.

Parameters
----------
from_version : str
The version of the data to migrate from.

to_version : str
The version of the data to migrate to.

camera : Camera
The camera settings for the data.

observatory : Observatory
The observatory settings for the data.

passband_map : PassbandMap
The passband map for the data.
"""

known_versions = ("1", "2")

def __init__(
self,
from_version: str = "1",
to_version: str = "2",
camera: Camera = None,
observatory: Observatory = None,
passband_map: PassbandMap = None,
):
self._from_version = Version(from_version)
self._to_version = Version(to_version)
if str(self.from_version.major) not in self.known_versions:
raise ValueError(
f"Unknown version: {self.from_version=}. "
f"Valid versions are: {self.known_versions}"
)

if str(self.to_version.major) not in self.known_versions:
raise ValueError(
f"Unknown version: {self.to_version=}. "
f"Valid versions are: {self.known_versions}"
)

if self.from_version >= self.to_version:
raise ValueError(
"Can only migrate from a lower version to a higher version"
)

self.camera = camera
self.observatory = observatory
self.passband_map = passband_map

@property
def from_version(self):
"""
The stellarphot major version to migrate from.
"""
return self._from_version

@property
def to_version(self):
"""
The stellarphot major version to migrate to.
"""
return self._to_version

def migrate(self, data):
"""
Migrate data from one version to another.

Parameters
----------
data : `astropy.table.Table` or `stellarphot.PhotometryData`
The data to migrate.
"""
if self.from_version.major == 1 and self.to_version.major == 2:
return self._migrate_v1_v2(data)
else: # pragma: no cover
# do not cover because right now no other version combo possible
raise ValueError(
f"Migration from version {self.from_version} to version "
f"{self.to_version} is not supported."
)

def _migrate_v1_v2(self, data):
"""
Migrate data from version 1 to version 2.

Parameters
----------
data : `astropy.table.Table` or `stellarphot.PhotometryData`
The data to migrate.
"""
new_data = data.copy()

unitify = {
"fwhm_x": u.pixel,
"fwhm_y": u.pixel,
"width": u.pixel,
"aperture": u.pixel,
"annulus_inner": u.pixel,
"annulus_outer": u.pixel,
"aperture_area": u.pixel,
"annulus_area": u.pixel,
"noise-aij": u.electron,
"annulus_sum": u.adu,
"aperture_sum": u.adu,
"aperture_net_flux": u.adu,
"noise": u.adu,
"sky_per_pix_avg": u.adu / u.pixel,
"sky_per_pix_med": u.adu / u.pixel,
"sky_per_pix_std": u.adu / u.pixel,
"RA": u.deg,
"Dec": u.deg,
"xcenter": u.pixel,
"ycenter": u.pixel,
"exposure": u.second,
}
for un in unitify:
# Note that this deliberately discards any unit that might have been
# present in the original data. This is a stellarphot-specific migration
# tool, not something more general, and the choices above are the correct
# units for the data.
new_data[un] = new_data[un].value * unitify[un]
Comment on lines +132 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we potentially have unit conversion issues here? Or were these units also used in v1 but just not attached to the columns? I don't remember.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old almost never included units, but it depended on whether the photometry was saved as CSV (no units saved) or FITS (units sometimes present for some things). This is maybe a little risky, but for feder data the image unit was always ADU, times, in seconds, etc.

No one else ever used v1 as far as I know, so this should be ok, I think


new_data["date-obs"] = Time(new_data["date-obs"], scale="utc")

v1_to_v2_col_renames = {
"aperture_net_flux": "aperture_net_cnts",
"BJD": "bjd",
"noise": "noise_cnts",
"noise-aij": "noise_electrons",
"filter": "passband",
"RA": "ra",
"Dec": "dec",
}

# THe new night calculation is better, so drop the old one
mwcraig marked this conversation as resolved.
Show resolved Hide resolved
del new_data["night"]

# Handle changes in instrumental magnitude column names
mag_columns = [col for col in new_data.colnames if col.startswith("mag_inst")]
mag_cols_have_filter_names = any(
mag.split("_")[-1] in new_data["filter"] for mag in mag_columns
)

if mag_cols_have_filter_names:
new_mag_col_data = np.zeros_like(new_data["date-obs"])
for col in mag_columns:
passband = col.split("_")[-1]
this_passband = new_data["filter"] == passband
new_mag_col_data[this_passband] = new_data[col][this_passband]
new_data["mag_inst"] = new_mag_col_data
new_data.remove_columns(mag_columns)

return PhotometryData(
input_data=new_data,
camera=self.camera,
observatory=self.observatory,
passband_map=self.passband_map,
colname_map=v1_to_v2_col_renames,
)