Edit on GitHub

utils.databases.lightcurves

Getting light curves data.

View Source

  1"""
  2Getting light curves data.
  3"""
  4
  5import lightkurve
  6from astropy.table import Table
  7import pandas
  8from pandera import pandas as pandera
  9import numpy
 10import pathlib
 11import re
 12from packaging.version import Version
 13
 14from typing import Optional, Dict, List, Pattern, Literal
 15
 16from ..files import file as fl
 17from ..logs.log import logger
 18
 19# apparently, one cannot set long/short threshold,
 20# hence this dictionary
 21#
 22# there are actually more authors available,
 23# but we are only interested in these
 24#
 25authors: Dict[str, Dict] = {
 26    "Kepler":
 27    {
 28        "mission": "Kepler",
 29        "cadence":
 30        {
 31            "long": [1800],
 32            "short": [60]
 33        }
 34    },
 35    "K2":
 36    {
 37        "mission": "K2",
 38        "cadence":
 39        {
 40            "long": [1800],
 41            "short": [60]
 42        }
 43    },
 44    "SPOC":
 45    {
 46        "mission": "TESS",
 47        "cadence":
 48        {
 49            "long": [600],
 50            "short": [120],
 51            "fast": [20]
 52        }
 53    },
 54    "TESS-SPOC":
 55    {
 56        "mission": "TESS",
 57        "cadence":
 58        {
 59            "long": []  # any cadence is long
 60        }
 61    }
 62}
 63"""
 64Dictionary of authors, their cadence values and mapping to missions.
 65"""
 66
 67missionSectorRegExes: Dict[str, Pattern] = {
 68    "Kepler": re.compile(
 69        r"^Kepler\s\w+\s(\d+)$"  # Kepler Quarter 15
 70    ),
 71    "K2": re.compile(
 72        r"^K2\s\w+\s(\d+)$"  # K2 Campaign 12
 73    ),
 74    "TESS": re.compile(
 75        r"^TESS\s\w+\s(\d+)$"  # TESS Sector 40
 76    )
 77}
 78"""
 79Dictionary of regular expressions for extracting sectors.
 80"""
 81
 82lightCurveFluxTableSchema = pandera.DataFrameSchema(
 83    {
 84        "time": pandera.Column(numpy.float64),
 85        "flux": pandera.Column(numpy.float32, nullable=True),
 86        "fluxError": pandera.Column(numpy.float32, nullable=True)
 87    },
 88    index=pandera.Index(int, unique=True),
 89    strict=True,  # only specified columns are allowed
 90    coerce=False  # do not cast other types to the specified one
 91)
 92"""
 93Table schema for light curve fluxes.
 94"""
 95
 96
 97def getLightCurveStats(
 98    starName: str,
 99    detailed: bool = True
100) -> Dict[str, Dict]:
101    """
102    Gather statistics about available cadence values for a given star.
103
104    If `detailed` is set to `False`, then function will skip collecting
105    cadence values count by sectors, so resulting statistics will only
106    contain total count of values.
107
108    Example:
109
110    ``` py
111    from phab.utils.databases import lightcurves
112
113    stats = lightcurves.getLightCurveStats("Kepler-114")
114    if not stats:
115        print("Didn't find any results for this star")
116    else:
117        missionName = "Kepler"
118        cadenceType = "long"
119        sectors = stats.get(
120            missionName,
121            {}
122        ).get(cadenceType)
123        if sectors is None:
124            print(
125                " ".join((
126                    "There doesn't seem to be any sectors",
127                    f"with [{cadenceType}] cadence by [{missionName}]"
128                ))
129            )
130        else:
131            totalProperty = "total"
132            sectorsCount = sectors.get(totalProperty)
133            if sectorsCount is None:
134                print(
135                    " ".join((
136                        f"For some reason, the [{totalProperty}] property",
137                        f"is missing from the [{cadenceType}] cadence",
138                        f"collection by [{missionName}]"
139                    ))
140                )
141            else:
142                print(
143                    " ".join((
144                        f"Total amount of sectors with [{cadenceType}]",
145                        f"cadence by [{missionName}]: {sectorsCount}",
146                    ))
147                )
148                bySectors = sectors.get("by-sectors")
149                if bySectors is None:
150                    print(
151                        " ".join((
152                            "For some reason, the [total] property is missing",
153                            f"from the [{cadenceType}] cadence collection",
154                            f"by [{missionName}]"
155                        ))
156                    )
157                else:
158                    for s in bySectors:
159                        print(f"- {s}: {bySectors[s]}")
160    ```
161    """
162    stats: Dict[str, Dict] = {}
163
164    lghtcrvs = lightkurve.search_lightcurve(
165        starName,
166        author=tuple(authors.keys())
167    )
168    if len(lghtcrvs) != 0:
169        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
170            ["author", "exptime", "mission"]
171        ]
172        logger.debug(tbl)
173
174        author: str  # for mypy, but even then it is not happy with something else
175        for author, group in (tbl.groupby("author")):  # type:ignore[assignment] # ya hz
176            if author not in authors:
177                raise ValueError(f"Unknown author: {author}")
178            mission = authors[author]["mission"]
179            if not stats.get(mission):
180                stats[mission] = {}
181            for cadence in ["long", "short", "fast"]:
182                if cadence in authors[author]["cadence"]:
183                    stats[mission][cadence] = {}
184                    cadenceValues: List[int] = (
185                        authors[author]["cadence"][cadence]
186                    )
187                    cadences: pandas.DataFrame
188                    if len(cadenceValues) > 0:  # take only specified values
189                        # perhaps both of these should be normalized to int
190                        cadences = group.query("exptime == @cadenceValues")
191                    else:  # any value is good
192                        cadences = group
193
194                    # total count
195                    stats[mission][cadence]["total"] = len(cadences)
196
197                    if detailed:
198                        # count by sectors
199                        stats[mission][cadence]["by-sectors"] = {}
200                        for m in cadences["mission"]:
201                            # logger.debug(cadences.query("mission == @m")[
202                            #     "exptime"
203                            # ].values)
204                            sectorMatch = re.search(
205                                missionSectorRegExes[mission],
206                                m
207                            )
208                            if not sectorMatch:
209                                raise ValueError(
210                                    " ".join((
211                                        "Couldn't extract sector from",
212                                        f"this mission value: {m}"
213                                    ))
214                                )
215                            sector = sectorMatch.group(1)
216                            if not stats[mission][cadence]["by-sectors"].get(
217                                sector
218                            ):  # this sector hasn't been added yet
219                                stats[mission][cadence]["by-sectors"][
220                                    sector
221                                ] = {}
222                                # save the cadence/exptime too (assuming
223                                # that it is the same for every sector entry)
224                                stats[mission][cadence]["by-sectors"][sector][
225                                    "exptime"
226                                ] = cadences.query("mission == @m")[
227                                    "exptime"
228                                ].values[0]  # there must be a better way
229                            try:
230                                stats[mission][cadence][
231                                    "by-sectors"
232                                ][sector]["count"] += 1
233                            except KeyError:
234                                stats[mission][cadence][
235                                    "by-sectors"
236                                ][sector]["count"] = 1
237    return stats
238
239
240def getLightCurveIDs(
241    starName: str
242) -> Dict[str, List[str]]:
243    """
244    Based on available cadence values statistics for a given star,
245    get names of missions and cadences. For instance, in order to pass
246    them to `altaipony.lcio.from_mast()`.
247
248    Example:
249
250    ``` py
251    from phab.utils.databases import lightcurves
252    from altaipony.lcio import from_mast
253
254    starName = "LTT 1445 A"
255    lightCurveIDs = {}
256
257    try:
258        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
259    except ValueError as ex:
260        print(f"Failed to get light curves missons and cadences. {ex}")
261        raise
262    if not lightCurveIDs:
263        raise ValueError("Didn't find any results for this star")
264    #print(lightCurveIDs)
265
266    for m in lightCurveIDs.keys():
267        #print(f"Mission: {m}")
268        for c in lightCurveIDs[m]:
269            #print(f"- {c}")
270            flc = from_mast(
271                starName,
272                mode="LC",
273                cadence=c,
274                mission=m
275            )
276            #print(flc)
277    ```
278    """
279    lightCurveIDs: Dict[str, List[str]] = {}
280
281    stats: Dict[str, Dict] = getLightCurveStats(
282        starName,
283        detailed=False
284    )
285    if not stats:
286        raise ValueError("Didn't find any results for this star")
287
288    # the order matters, it goes from most important to least important,
289    # and in fact long cadence is so not important that it is discarded
290    # if there is fast or short cadence available
291    cadencePriority = ["fast", "short", "long"]
292
293    for m in stats.keys():
294        lightCurveIDs[m] = []
295        priorityThreshold = 0
296        for cp in cadencePriority:
297            # if there is already fast or short cadence in the list,
298            # don't take long cadence (except for mission K2, because
299            # its long cadence is what's most important even if
300            # there are also fast and short ones)
301            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
302                break
303            if cp in stats[m]:
304                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
305                totalCnt = stats[m][cp].get("total")
306                if totalCnt and totalCnt != 0:
307                    lightCurveIDs[m].append(cp)
308                # else:
309                #     print(
310                #         " ".join((
311                #             f"[WARNING] The [{cp}] cadence count",
312                #             f"in [{m}] is 0 (or missing)"
313                #         ))
314                #     )
315            priorityThreshold += 1
316
317    return lightCurveIDs
318
319
320def fitsToPandas(
321    fitsFilePath: str,
322    fitsType: Optional[Literal["tess", "kepler"]] = None,
323    qualityBitmask: Literal["none", "default", "hard", "hardest"] = "default",
324    dropNanTimes: bool = True,
325    convertTimesToSeconds: bool = False
326) -> pandas.DataFrame:
327    """
328    Open a generic light curves [FITS](https://en.wikipedia.org/wiki/FITS) file
329    and create a Pandas table from it. Only the fluxes, their times
330    and errors columns are taken.
331
332    Handles the big/little endians problem when converting from FITS to Pandas.
333
334    Example:
335
336    ``` py
337    from phab.utils.databases import lightcurves
338
339    pnd = lightcurves.fitsToPandas(
340        "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
341        fitsType="tess",
342        qualityBitmask="default",
343        dropNanTimes=True,
344        convertTimesToSeconds=True
345    )
346
347    #print(pnd)
348    ```
349    """
350    lc = None
351    fitsFile: Optional[pathlib.Path] = fl.fileExists(fitsFilePath)
352    if fitsFile is None:
353        raise ValueError(
354            f"Provided path to [{fitsFilePath}] seems to be wrong"
355        )
356    else:
357        lc = Table.read(fitsFile)
358
359    # exclude values which do not satisfy the required quality
360    if fitsType is not None:
361        msk = None
362        if fitsType == "tess":
363            msk = lightkurve.utils.TessQualityFlags.create_quality_mask(
364                quality_array=lc["QUALITY"],
365                bitmask=qualityBitmask
366            )
367        elif fitsType == "kepler":
368            msk = lightkurve.utils.KeplerQualityFlags.create_quality_mask(
369                quality_array=lc["QUALITY"],
370                bitmask=qualityBitmask
371            )
372        else:
373            print(
374                " ".join((
375                    "[WARNING] Unknown FITS type, don't know",
376                    "which quality mask to use"
377                ))
378            )
379        lc = lc[msk]
380
381    narr = numpy.array(lc)
382    # FITS stores data in big-endian, but pandas works with little-endian,
383    # so the byte order needs to be swapped
384    # https://stackoverflow.com/a/30284033/1688203
385    if Version(numpy.__version__) > Version("1.26.4"):
386        # if that doesn't work, then you might need to downgrade to 1.26.4
387        narr = narr.view(narr.dtype.newbyteorder()).byteswap()
388    else:
389        narr = narr.byteswap().newbyteorder()
390
391    # astropy.time does not(?) support NaN
392    if dropNanTimes:
393        nantimes = numpy.isnan(narr["TIME"].data)
394        if numpy.any(nantimes):
395            print(
396                " ".join((
397                    f"[DEBUG] {numpy.sum(nantimes)} rows were excluded,",
398                    "because their time values are NaN"
399                ))
400            )
401        narr = narr[~nantimes]
402
403    # apparently, one cannot just take columns from `lc`/`narr` directly,
404    # hence this intermediate table
405    pndraw = pandas.DataFrame(narr)
406    logger.debug(f"Light curve table columns: {pndraw.columns}")
407
408    flux = pandas.DataFrame(
409        columns=[
410            "time",
411            "flux",
412            "fluxError"
413        ]
414    )
415    flux["time"] = pndraw["TIME"]
416    flux["flux"] = pndraw["PDCSAP_FLUX"]
417    flux["fluxError"] = pndraw["PDCSAP_FLUX_ERR"]
418
419    # in case excluding NaN times right after `Table.read()` is less efficient
420    # if dropNanTimes:
421    #     flux = flux.dropna(subset=["time"])
422
423    if convertTimesToSeconds:
424        flux["time"] = flux["time"] * 24 * 60 * 60
425
426    if dropNanTimes:
427        lightCurveFluxTableSchema.validate(flux)
428    else:
429        lightCurveFluxTableSchema.update_column(
430            "time",
431            dtype=numpy.float64,
432            nullable=True
433        ).validate(flux)
434
435    return flux
436
437
438def lightCurveTessToPandas(
439    lightKurve: lightkurve.lightcurve.TessLightCurve,
440    convertTimesToSeconds: bool = False
441) -> pandas.DataFrame:
442    """
443    Converting a TESS light curve object to a Pandas table. In general,
444    it does almost the same thing as
445    `utils.databases.lightcurves.fitsToPandas()`,
446    but here there it uses a TESS-specific reading function, and also
447    there is no need to drop NaN times "manually" (*and fiddle with endians?*).
448
449    Example:
450
451    ``` py
452    from phab.utils.databases import lightcurves
453    import lightkurve
454
455    downloadLC: bool = False
456    lc = None
457    if downloadLC:
458        search_result = lightkurve.search_lightcurve(
459            "Karmn J07446+035",
460            author="SPOC",
461            cadence="short"
462        )
463        lc = search_result[0].download(
464            quality_bitmask="default"
465        )
466    else:
467        lc = lightkurve.TessLightCurve.read(
468            "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
469            quality_bitmask="default"
470        )
471
472    pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True)
473
474    #print(pnd)
475    ```
476    """
477    pndraw = lightKurve.to_pandas()
478    logger.debug(f"Light curve table columns: {pndraw.columns}")
479
480    flux = pandas.DataFrame(
481        columns=[
482            "time",
483            "flux",
484            "fluxError"
485        ]
486    )
487
488    flux["time"] = pndraw.index
489    flux["flux"] = pndraw["pdcsap_flux"].values
490    flux["fluxError"] = pndraw["pdcsap_flux_err"].values
491
492    if convertTimesToSeconds:
493        flux["time"] = flux["time"] * 24 * 60 * 60
494
495    lightCurveFluxTableSchema.validate(flux)
496
497    return flux

authors: Dict[str, Dict] = {'Kepler': {'mission': 'Kepler', 'cadence': {'long': [1800], 'short': [60]}}, 'K2': {'mission': 'K2', 'cadence': {'long': [1800], 'short': [60]}}, 'SPOC': {'mission': 'TESS', 'cadence': {'long': [600], 'short': [120], 'fast': [20]}}, 'TESS-SPOC': {'mission': 'TESS', 'cadence': {'long': []}}}

Dictionary of authors, their cadence values and mapping to missions.

missionSectorRegExes: Dict[str, Pattern] = {'Kepler': re.compile('^Kepler\\s\\w+\\s(\\d+)$'), 'K2': re.compile('^K2\\s\\w+\\s(\\d+)$'), 'TESS': re.compile('^TESS\\s\\w+\\s(\\d+)$')}

Dictionary of regular expressions for extracting sectors.

lightCurveFluxTableSchema = <Schema DataFrameSchema(columns={'time': <Schema Column(name=time, type=DataType(float64))>, 'flux': <Schema Column(name=flux, type=DataType(float32))>, 'fluxError': <Schema Column(name=fluxError, type=DataType(float32))>}, checks=[], parsers=[], index=<Schema Index(name=None, type=DataType(int64))>, dtype=None, coerce=False, strict=True, name=None, ordered=False, unique=None, report_duplicates=all, unique_column_names=False, add_missing_columns=False, title=None, description=None, metadata=None, drop_invalid_rows=False)>

Table schema for light curve fluxes.

def getLightCurveStats(starName: str, detailed: bool = True) -> Dict[str, Dict]: View Source

 98def getLightCurveStats(
 99    starName: str,
100    detailed: bool = True
101) -> Dict[str, Dict]:
102    """
103    Gather statistics about available cadence values for a given star.
104
105    If `detailed` is set to `False`, then function will skip collecting
106    cadence values count by sectors, so resulting statistics will only
107    contain total count of values.
108
109    Example:
110
111    ``` py
112    from phab.utils.databases import lightcurves
113
114    stats = lightcurves.getLightCurveStats("Kepler-114")
115    if not stats:
116        print("Didn't find any results for this star")
117    else:
118        missionName = "Kepler"
119        cadenceType = "long"
120        sectors = stats.get(
121            missionName,
122            {}
123        ).get(cadenceType)
124        if sectors is None:
125            print(
126                " ".join((
127                    "There doesn't seem to be any sectors",
128                    f"with [{cadenceType}] cadence by [{missionName}]"
129                ))
130            )
131        else:
132            totalProperty = "total"
133            sectorsCount = sectors.get(totalProperty)
134            if sectorsCount is None:
135                print(
136                    " ".join((
137                        f"For some reason, the [{totalProperty}] property",
138                        f"is missing from the [{cadenceType}] cadence",
139                        f"collection by [{missionName}]"
140                    ))
141                )
142            else:
143                print(
144                    " ".join((
145                        f"Total amount of sectors with [{cadenceType}]",
146                        f"cadence by [{missionName}]: {sectorsCount}",
147                    ))
148                )
149                bySectors = sectors.get("by-sectors")
150                if bySectors is None:
151                    print(
152                        " ".join((
153                            "For some reason, the [total] property is missing",
154                            f"from the [{cadenceType}] cadence collection",
155                            f"by [{missionName}]"
156                        ))
157                    )
158                else:
159                    for s in bySectors:
160                        print(f"- {s}: {bySectors[s]}")
161    ```
162    """
163    stats: Dict[str, Dict] = {}
164
165    lghtcrvs = lightkurve.search_lightcurve(
166        starName,
167        author=tuple(authors.keys())
168    )
169    if len(lghtcrvs) != 0:
170        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
171            ["author", "exptime", "mission"]
172        ]
173        logger.debug(tbl)
174
175        author: str  # for mypy, but even then it is not happy with something else
176        for author, group in (tbl.groupby("author")):  # type:ignore[assignment] # ya hz
177            if author not in authors:
178                raise ValueError(f"Unknown author: {author}")
179            mission = authors[author]["mission"]
180            if not stats.get(mission):
181                stats[mission] = {}
182            for cadence in ["long", "short", "fast"]:
183                if cadence in authors[author]["cadence"]:
184                    stats[mission][cadence] = {}
185                    cadenceValues: List[int] = (
186                        authors[author]["cadence"][cadence]
187                    )
188                    cadences: pandas.DataFrame
189                    if len(cadenceValues) > 0:  # take only specified values
190                        # perhaps both of these should be normalized to int
191                        cadences = group.query("exptime == @cadenceValues")
192                    else:  # any value is good
193                        cadences = group
194
195                    # total count
196                    stats[mission][cadence]["total"] = len(cadences)
197
198                    if detailed:
199                        # count by sectors
200                        stats[mission][cadence]["by-sectors"] = {}
201                        for m in cadences["mission"]:
202                            # logger.debug(cadences.query("mission == @m")[
203                            #     "exptime"
204                            # ].values)
205                            sectorMatch = re.search(
206                                missionSectorRegExes[mission],
207                                m
208                            )
209                            if not sectorMatch:
210                                raise ValueError(
211                                    " ".join((
212                                        "Couldn't extract sector from",
213                                        f"this mission value: {m}"
214                                    ))
215                                )
216                            sector = sectorMatch.group(1)
217                            if not stats[mission][cadence]["by-sectors"].get(
218                                sector
219                            ):  # this sector hasn't been added yet
220                                stats[mission][cadence]["by-sectors"][
221                                    sector
222                                ] = {}
223                                # save the cadence/exptime too (assuming
224                                # that it is the same for every sector entry)
225                                stats[mission][cadence]["by-sectors"][sector][
226                                    "exptime"
227                                ] = cadences.query("mission == @m")[
228                                    "exptime"
229                                ].values[0]  # there must be a better way
230                            try:
231                                stats[mission][cadence][
232                                    "by-sectors"
233                                ][sector]["count"] += 1
234                            except KeyError:
235                                stats[mission][cadence][
236                                    "by-sectors"
237                                ][sector]["count"] = 1
238    return stats

Gather statistics about available cadence values for a given star.

If detailed is set to False, then function will skip collecting cadence values count by sectors, so resulting statistics will only contain total count of values.

Example:

from phab.utils.databases import lightcurves

stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
    print("Didn't find any results for this star")
else:
    missionName = "Kepler"
    cadenceType = "long"
    sectors = stats.get(
        missionName,
        {}
    ).get(cadenceType)
    if sectors is None:
        print(
            " ".join((
                "There doesn't seem to be any sectors",
                f"with [{cadenceType}] cadence by [{missionName}]"
            ))
        )
    else:
        totalProperty = "total"
        sectorsCount = sectors.get(totalProperty)
        if sectorsCount is None:
            print(
                " ".join((
                    f"For some reason, the [{totalProperty}] property",
                    f"is missing from the [{cadenceType}] cadence",
                    f"collection by [{missionName}]"
                ))
            )
        else:
            print(
                " ".join((
                    f"Total amount of sectors with [{cadenceType}]",
                    f"cadence by [{missionName}]: {sectorsCount}",
                ))
            )
            bySectors = sectors.get("by-sectors")
            if bySectors is None:
                print(
                    " ".join((
                        "For some reason, the [total] property is missing",
                        f"from the [{cadenceType}] cadence collection",
                        f"by [{missionName}]"
                    ))
                )
            else:
                for s in bySectors:
                    print(f"- {s}: {bySectors[s]}")

def getLightCurveIDs(starName: str) -> Dict[str, List[str]]: View Source

241def getLightCurveIDs(
242    starName: str
243) -> Dict[str, List[str]]:
244    """
245    Based on available cadence values statistics for a given star,
246    get names of missions and cadences. For instance, in order to pass
247    them to `altaipony.lcio.from_mast()`.
248
249    Example:
250
251    ``` py
252    from phab.utils.databases import lightcurves
253    from altaipony.lcio import from_mast
254
255    starName = "LTT 1445 A"
256    lightCurveIDs = {}
257
258    try:
259        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
260    except ValueError as ex:
261        print(f"Failed to get light curves missons and cadences. {ex}")
262        raise
263    if not lightCurveIDs:
264        raise ValueError("Didn't find any results for this star")
265    #print(lightCurveIDs)
266
267    for m in lightCurveIDs.keys():
268        #print(f"Mission: {m}")
269        for c in lightCurveIDs[m]:
270            #print(f"- {c}")
271            flc = from_mast(
272                starName,
273                mode="LC",
274                cadence=c,
275                mission=m
276            )
277            #print(flc)
278    ```
279    """
280    lightCurveIDs: Dict[str, List[str]] = {}
281
282    stats: Dict[str, Dict] = getLightCurveStats(
283        starName,
284        detailed=False
285    )
286    if not stats:
287        raise ValueError("Didn't find any results for this star")
288
289    # the order matters, it goes from most important to least important,
290    # and in fact long cadence is so not important that it is discarded
291    # if there is fast or short cadence available
292    cadencePriority = ["fast", "short", "long"]
293
294    for m in stats.keys():
295        lightCurveIDs[m] = []
296        priorityThreshold = 0
297        for cp in cadencePriority:
298            # if there is already fast or short cadence in the list,
299            # don't take long cadence (except for mission K2, because
300            # its long cadence is what's most important even if
301            # there are also fast and short ones)
302            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
303                break
304            if cp in stats[m]:
305                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
306                totalCnt = stats[m][cp].get("total")
307                if totalCnt and totalCnt != 0:
308                    lightCurveIDs[m].append(cp)
309                # else:
310                #     print(
311                #         " ".join((
312                #             f"[WARNING] The [{cp}] cadence count",
313                #             f"in [{m}] is 0 (or missing)"
314                #         ))
315                #     )
316            priorityThreshold += 1
317
318    return lightCurveIDs

Based on available cadence values statistics for a given star, get names of missions and cadences. For instance, in order to pass them to altaipony.lcio.from_mast().

Example:

from phab.utils.databases import lightcurves
from altaipony.lcio import from_mast

starName = "LTT 1445 A"
lightCurveIDs = {}

try:
    lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
    print(f"Failed to get light curves missons and cadences. {ex}")
    raise
if not lightCurveIDs:
    raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)

for m in lightCurveIDs.keys():
    #print(f"Mission: {m}")
    for c in lightCurveIDs[m]:
        #print(f"- {c}")
        flc = from_mast(
            starName,
            mode="LC",
            cadence=c,
            mission=m
        )
        #print(flc)

def fitsToPandas( fitsFilePath: str, fitsType: Optional[Literal['tess', 'kepler']] = None, qualityBitmask: Literal['none', 'default', 'hard', 'hardest'] = 'default', dropNanTimes: bool = True, convertTimesToSeconds: bool = False) -> pandas.core.frame.DataFrame: View Source

321def fitsToPandas(
322    fitsFilePath: str,
323    fitsType: Optional[Literal["tess", "kepler"]] = None,
324    qualityBitmask: Literal["none", "default", "hard", "hardest"] = "default",
325    dropNanTimes: bool = True,
326    convertTimesToSeconds: bool = False
327) -> pandas.DataFrame:
328    """
329    Open a generic light curves [FITS](https://en.wikipedia.org/wiki/FITS) file
330    and create a Pandas table from it. Only the fluxes, their times
331    and errors columns are taken.
332
333    Handles the big/little endians problem when converting from FITS to Pandas.
334
335    Example:
336
337    ``` py
338    from phab.utils.databases import lightcurves
339
340    pnd = lightcurves.fitsToPandas(
341        "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
342        fitsType="tess",
343        qualityBitmask="default",
344        dropNanTimes=True,
345        convertTimesToSeconds=True
346    )
347
348    #print(pnd)
349    ```
350    """
351    lc = None
352    fitsFile: Optional[pathlib.Path] = fl.fileExists(fitsFilePath)
353    if fitsFile is None:
354        raise ValueError(
355            f"Provided path to [{fitsFilePath}] seems to be wrong"
356        )
357    else:
358        lc = Table.read(fitsFile)
359
360    # exclude values which do not satisfy the required quality
361    if fitsType is not None:
362        msk = None
363        if fitsType == "tess":
364            msk = lightkurve.utils.TessQualityFlags.create_quality_mask(
365                quality_array=lc["QUALITY"],
366                bitmask=qualityBitmask
367            )
368        elif fitsType == "kepler":
369            msk = lightkurve.utils.KeplerQualityFlags.create_quality_mask(
370                quality_array=lc["QUALITY"],
371                bitmask=qualityBitmask
372            )
373        else:
374            print(
375                " ".join((
376                    "[WARNING] Unknown FITS type, don't know",
377                    "which quality mask to use"
378                ))
379            )
380        lc = lc[msk]
381
382    narr = numpy.array(lc)
383    # FITS stores data in big-endian, but pandas works with little-endian,
384    # so the byte order needs to be swapped
385    # https://stackoverflow.com/a/30284033/1688203
386    if Version(numpy.__version__) > Version("1.26.4"):
387        # if that doesn't work, then you might need to downgrade to 1.26.4
388        narr = narr.view(narr.dtype.newbyteorder()).byteswap()
389    else:
390        narr = narr.byteswap().newbyteorder()
391
392    # astropy.time does not(?) support NaN
393    if dropNanTimes:
394        nantimes = numpy.isnan(narr["TIME"].data)
395        if numpy.any(nantimes):
396            print(
397                " ".join((
398                    f"[DEBUG] {numpy.sum(nantimes)} rows were excluded,",
399                    "because their time values are NaN"
400                ))
401            )
402        narr = narr[~nantimes]
403
404    # apparently, one cannot just take columns from `lc`/`narr` directly,
405    # hence this intermediate table
406    pndraw = pandas.DataFrame(narr)
407    logger.debug(f"Light curve table columns: {pndraw.columns}")
408
409    flux = pandas.DataFrame(
410        columns=[
411            "time",
412            "flux",
413            "fluxError"
414        ]
415    )
416    flux["time"] = pndraw["TIME"]
417    flux["flux"] = pndraw["PDCSAP_FLUX"]
418    flux["fluxError"] = pndraw["PDCSAP_FLUX_ERR"]
419
420    # in case excluding NaN times right after `Table.read()` is less efficient
421    # if dropNanTimes:
422    #     flux = flux.dropna(subset=["time"])
423
424    if convertTimesToSeconds:
425        flux["time"] = flux["time"] * 24 * 60 * 60
426
427    if dropNanTimes:
428        lightCurveFluxTableSchema.validate(flux)
429    else:
430        lightCurveFluxTableSchema.update_column(
431            "time",
432            dtype=numpy.float64,
433            nullable=True
434        ).validate(flux)
435
436    return flux

Open a generic light curves FITS file and create a Pandas table from it. Only the fluxes, their times and errors columns are taken.

Handles the big/little endians problem when converting from FITS to Pandas.

Example:

from phab.utils.databases import lightcurves

pnd = lightcurves.fitsToPandas(
    "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
    fitsType="tess",
    qualityBitmask="default",
    dropNanTimes=True,
    convertTimesToSeconds=True
)

#print(pnd)

def lightCurveTessToPandas( lightKurve: lightkurve.lightcurve.TessLightCurve, convertTimesToSeconds: bool = False) -> pandas.core.frame.DataFrame: View Source

439def lightCurveTessToPandas(
440    lightKurve: lightkurve.lightcurve.TessLightCurve,
441    convertTimesToSeconds: bool = False
442) -> pandas.DataFrame:
443    """
444    Converting a TESS light curve object to a Pandas table. In general,
445    it does almost the same thing as
446    `utils.databases.lightcurves.fitsToPandas()`,
447    but here there it uses a TESS-specific reading function, and also
448    there is no need to drop NaN times "manually" (*and fiddle with endians?*).
449
450    Example:
451
452    ``` py
453    from phab.utils.databases import lightcurves
454    import lightkurve
455
456    downloadLC: bool = False
457    lc = None
458    if downloadLC:
459        search_result = lightkurve.search_lightcurve(
460            "Karmn J07446+035",
461            author="SPOC",
462            cadence="short"
463        )
464        lc = search_result[0].download(
465            quality_bitmask="default"
466        )
467    else:
468        lc = lightkurve.TessLightCurve.read(
469            "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
470            quality_bitmask="default"
471        )
472
473    pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True)
474
475    #print(pnd)
476    ```
477    """
478    pndraw = lightKurve.to_pandas()
479    logger.debug(f"Light curve table columns: {pndraw.columns}")
480
481    flux = pandas.DataFrame(
482        columns=[
483            "time",
484            "flux",
485            "fluxError"
486        ]
487    )
488
489    flux["time"] = pndraw.index
490    flux["flux"] = pndraw["pdcsap_flux"].values
491    flux["fluxError"] = pndraw["pdcsap_flux_err"].values
492
493    if convertTimesToSeconds:
494        flux["time"] = flux["time"] * 24 * 60 * 60
495
496    lightCurveFluxTableSchema.validate(flux)
497
498    return flux

Converting a TESS light curve object to a Pandas table. In general, it does almost the same thing as utils.databases.lightcurves.fitsToPandas(), but here there it uses a TESS-specific reading function, and also there is no need to drop NaN times "manually" (and fiddle with endians?).

Example:

from phab.utils.databases import lightcurves
import lightkurve

downloadLC: bool = False
lc = None
if downloadLC:
    search_result = lightkurve.search_lightcurve(
        "Karmn J07446+035",
        author="SPOC",
        cadence="short"
    )
    lc = search_result[0].download(
        quality_bitmask="default"
    )
else:
    lc = lightkurve.TessLightCurve.read(
        "./data/tess2019006130736-s0007-0000000266744225-0131-s_lc.fits",
        quality_bitmask="default"
    )

pnd = lightcurves.lightCurveTessToPandas(lc, convertTimesToSeconds=True)

#print(pnd)