Edit on GitHub

utils.databases.lightcurves

Getting light curves data.

  1"""
  2Getting light curves data.
  3"""
  4
  5import lightkurve
  6import pandas
  7import re
  8
  9from typing import Optional, Dict, List, Pattern
 10
 11from ..logs.log import logger
 12
 13# apparently, one cannot set long/short threshold,
 14# hence this dictionary
 15#
 16# there are actually more authors available,
 17# but we are only interested in these
 18#
 19authors: Dict[str, Dict] = {
 20    "Kepler":
 21    {
 22        "mission": "Kepler",
 23        "cadence":
 24        {
 25            "long": [1800],
 26            "short": [60]
 27        }
 28    },
 29    "K2":
 30    {
 31        "mission": "K2",
 32        "cadence":
 33        {
 34            "long": [1800],
 35            "short": [60]
 36        }
 37    },
 38    "SPOC":
 39    {
 40        "mission": "TESS",
 41        "cadence":
 42        {
 43            "long": [600],
 44            "short": [120],
 45            "fast": [20]
 46        }
 47    },
 48    "TESS-SPOC":
 49    {
 50        "mission": "TESS",
 51        "cadence":
 52        {
 53            "long": []  # any cadence is long
 54        }
 55    }
 56}
 57"""
 58Dictionary of authors, their cadence values and mapping to missions.
 59"""
 60
 61missionSectorRegExes: Dict[str, Pattern] = {
 62    "Kepler": re.compile(
 63        r"^Kepler\s\w+\s(\d+)$"  # Kepler Quarter 15
 64    ),
 65    "K2": re.compile(
 66        r"^K2\s\w+\s(\d+)$"  # K2 Campaign 12
 67    ),
 68    "TESS": re.compile(
 69        r"^TESS\s\w+\s(\d+)$"  # TESS Sector 40
 70    )
 71}
 72"""
 73Dictionary of regular expressions for extracting sectors.
 74"""
 75
 76
 77def getLightCurveStats(
 78    starName: str,
 79    detailed: bool = True
 80) -> Dict[str, Dict]:
 81    """
 82    Gather statistics about available cadence values for a given star.
 83
 84    If `detailed` is set to `False`, then function will skip collecting
 85    cadence values count by sectors, so resulting statistics will only
 86    contain total count of values.
 87
 88    Example:
 89
 90    ``` py
 91    from phab.utils.databases import lightcurves
 92
 93    stats = lightcurves.getLightCurveStats("Kepler-114")
 94    if not stats:
 95        print("Didn't find any results for this star")
 96    else:
 97        missionName = "Kepler"
 98        cadenceType = "long"
 99        sectors = stats.get(
100            missionName,
101            {}
102        ).get(cadenceType)
103        if sectors is None:
104            print(
105                " ".join((
106                    "There doesn't seem to be any sectors",
107                    f"with [{cadenceType}] cadence by [{missionName}]"
108                ))
109            )
110        else:
111            totalProperty = "total"
112            sectorsCount = sectors.get(totalProperty)
113            if sectorsCount is None:
114                print(
115                    " ".join((
116                        f"For some reason, the [{totalProperty}] property",
117                        f"is missing from the [{cadenceType}] cadence",
118                        f"collection by [{missionName}]"
119                    ))
120                )
121            else:
122                print(
123                    " ".join((
124                        f"Total amount of sectors with [{cadenceType}]",
125                        f"cadence by [{missionName}]: {sectorsCount}",
126                    ))
127                )
128                bySectors = sectors.get("by-sectors")
129                if bySectors is None:
130                    print(
131                        " ".join((
132                            "For some reason, the [total] property is missing",
133                            f"from the [{cadenceType}] cadence collection",
134                            f"by [{missionName}]"
135                        ))
136                    )
137                else:
138                    for s in bySectors:
139                        print(f"- {s}: {bySectors[s]}")
140    ```
141    """
142    stats: Dict[str, Dict] = {}
143
144    lghtcrvs = lightkurve.search_lightcurve(
145        starName,
146        author=tuple(authors.keys())
147    )
148    if len(lghtcrvs) != 0:
149        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
150            ["author", "exptime", "mission"]
151        ]
152        logger.debug(tbl)
153
154        author: str  # for mypy, but even then it is not happy with something else
155        for author, group in (tbl.groupby("author")):  # type:ignore[assignment] # ya hz
156            if author not in authors:
157                raise ValueError(f"Unknown author: {author}")
158            mission = authors[author]["mission"]
159            if not stats.get(mission):
160                stats[mission] = {}
161            for cadence in ["long", "short", "fast"]:
162                if cadence in authors[author]["cadence"]:
163                    stats[mission][cadence] = {}
164                    cadenceValues: List[int] = (
165                        authors[author]["cadence"][cadence]
166                    )
167                    cadences: pandas.DataFrame
168                    if len(cadenceValues) > 0:  # take only specified values
169                        # perhaps both of these should be normalized to int
170                        cadences = group.query("exptime == @cadenceValues")
171                    else:  # any value is good
172                        cadences = group
173
174                    # total count
175                    stats[mission][cadence]["total"] = len(cadences)
176
177                    if detailed:
178                        # count by sectors
179                        stats[mission][cadence]["by-sectors"] = {}
180                        for m in cadences["mission"]:
181                            # logger.debug(cadences.query("mission == @m")[
182                            #     "exptime"
183                            # ].values)
184                            sectorMatch = re.search(
185                                missionSectorRegExes[mission],
186                                m
187                            )
188                            if not sectorMatch:
189                                raise ValueError(
190                                    " ".join((
191                                        "Couldn't extract sector from",
192                                        f"this mission value: {m}"
193                                    ))
194                                )
195                            sector = sectorMatch.group(1)
196                            if not stats[mission][cadence]["by-sectors"].get(
197                                sector
198                            ):  # this sector hasn't been added yet
199                                stats[mission][cadence]["by-sectors"][
200                                    sector
201                                ] = {}
202                                # save the cadence/exptime too (assuming
203                                # that it is the same for every sector entry)
204                                stats[mission][cadence]["by-sectors"][sector][
205                                    "exptime"
206                                ] = cadences.query("mission == @m")[
207                                    "exptime"
208                                ].values[0]  # there must be a better way
209                            try:
210                                stats[mission][cadence][
211                                    "by-sectors"
212                                ][sector]["count"] += 1
213                            except KeyError:
214                                stats[mission][cadence][
215                                    "by-sectors"
216                                ][sector]["count"] = 1
217    return stats
218
219
220def getLightCurveIDs(
221    starName: str
222) -> Dict[str, List[str]]:
223    """
224    Based on available cadence values statistics for a given star,
225    get names of missions and cadences. For instance, in order to pass
226    them to `altaipony.lcio.from_mast()`.
227
228    Example:
229
230    ``` py
231    from phab.utils.databases import lightcurves
232    from altaipony.lcio import from_mast
233
234    starName = "LTT 1445 A"
235    lightCurveIDs = {}
236
237    try:
238        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
239    except ValueError as ex:
240        print(f"Failed to get light curves missons and cadences. {ex}")
241        raise
242    if not lightCurveIDs:
243        raise ValueError("Didn't find any results for this star")
244    #print(lightCurveIDs)
245
246    for m in lightCurveIDs.keys():
247        #print(f"Mission: {m}")
248        for c in lightCurveIDs[m]:
249            #print(f"- {c}")
250            flc = from_mast(
251                starName,
252                mode="LC",
253                cadence=c,
254                mission=m
255            )
256            #print(flc)
257    ```
258    """
259    lightCurveIDs: Dict[str, List[str]] = {}
260
261    stats: Dict[str, Dict] = getLightCurveStats(
262        starName,
263        detailed=False
264    )
265    if not stats:
266        raise ValueError("Didn't find any results for this star")
267
268    # the order matters, it goes from most important to least important,
269    # and in fact long cadence is so not important that it is discarded
270    # if there is fast or short cadence available
271    cadencePriority = ["fast", "short", "long"]
272
273    for m in stats.keys():
274        lightCurveIDs[m] = []
275        priorityThreshold = 0
276        for cp in cadencePriority:
277            # if there is already fast or short cadence in the list,
278            # don't take long cadence (except for mission K2, because
279            # its long cadence is what's most important even if
280            # there are also fast and short ones)
281            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
282                break
283            if cp in stats[m]:
284                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
285                totalCnt = stats[m][cp].get("total")
286                if totalCnt and totalCnt != 0:
287                    lightCurveIDs[m].append(cp)
288                # else:
289                #     print(
290                #         " ".join((
291                #             f"[WARNING] The [{cp}] cadence count",
292                #             f"in [{m}] is 0 (or missing)"
293                #         ))
294                #     )
295            priorityThreshold += 1
296
297    return lightCurveIDs
authors: Dict[str, Dict] = {'Kepler': {'mission': 'Kepler', 'cadence': {'long': [1800], 'short': [60]}}, 'K2': {'mission': 'K2', 'cadence': {'long': [1800], 'short': [60]}}, 'SPOC': {'mission': 'TESS', 'cadence': {'long': [600], 'short': [120], 'fast': [20]}}, 'TESS-SPOC': {'mission': 'TESS', 'cadence': {'long': []}}}

Dictionary of authors, their cadence values and mapping to missions.

missionSectorRegExes: Dict[str, Pattern] = {'Kepler': re.compile('^Kepler\\s\\w+\\s(\\d+)$'), 'K2': re.compile('^K2\\s\\w+\\s(\\d+)$'), 'TESS': re.compile('^TESS\\s\\w+\\s(\\d+)$')}

Dictionary of regular expressions for extracting sectors.

def getLightCurveStats(starName: str, detailed: bool = True) -> Dict[str, Dict]:
 78def getLightCurveStats(
 79    starName: str,
 80    detailed: bool = True
 81) -> Dict[str, Dict]:
 82    """
 83    Gather statistics about available cadence values for a given star.
 84
 85    If `detailed` is set to `False`, then function will skip collecting
 86    cadence values count by sectors, so resulting statistics will only
 87    contain total count of values.
 88
 89    Example:
 90
 91    ``` py
 92    from phab.utils.databases import lightcurves
 93
 94    stats = lightcurves.getLightCurveStats("Kepler-114")
 95    if not stats:
 96        print("Didn't find any results for this star")
 97    else:
 98        missionName = "Kepler"
 99        cadenceType = "long"
100        sectors = stats.get(
101            missionName,
102            {}
103        ).get(cadenceType)
104        if sectors is None:
105            print(
106                " ".join((
107                    "There doesn't seem to be any sectors",
108                    f"with [{cadenceType}] cadence by [{missionName}]"
109                ))
110            )
111        else:
112            totalProperty = "total"
113            sectorsCount = sectors.get(totalProperty)
114            if sectorsCount is None:
115                print(
116                    " ".join((
117                        f"For some reason, the [{totalProperty}] property",
118                        f"is missing from the [{cadenceType}] cadence",
119                        f"collection by [{missionName}]"
120                    ))
121                )
122            else:
123                print(
124                    " ".join((
125                        f"Total amount of sectors with [{cadenceType}]",
126                        f"cadence by [{missionName}]: {sectorsCount}",
127                    ))
128                )
129                bySectors = sectors.get("by-sectors")
130                if bySectors is None:
131                    print(
132                        " ".join((
133                            "For some reason, the [total] property is missing",
134                            f"from the [{cadenceType}] cadence collection",
135                            f"by [{missionName}]"
136                        ))
137                    )
138                else:
139                    for s in bySectors:
140                        print(f"- {s}: {bySectors[s]}")
141    ```
142    """
143    stats: Dict[str, Dict] = {}
144
145    lghtcrvs = lightkurve.search_lightcurve(
146        starName,
147        author=tuple(authors.keys())
148    )
149    if len(lghtcrvs) != 0:
150        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
151            ["author", "exptime", "mission"]
152        ]
153        logger.debug(tbl)
154
155        author: str  # for mypy, but even then it is not happy with something else
156        for author, group in (tbl.groupby("author")):  # type:ignore[assignment] # ya hz
157            if author not in authors:
158                raise ValueError(f"Unknown author: {author}")
159            mission = authors[author]["mission"]
160            if not stats.get(mission):
161                stats[mission] = {}
162            for cadence in ["long", "short", "fast"]:
163                if cadence in authors[author]["cadence"]:
164                    stats[mission][cadence] = {}
165                    cadenceValues: List[int] = (
166                        authors[author]["cadence"][cadence]
167                    )
168                    cadences: pandas.DataFrame
169                    if len(cadenceValues) > 0:  # take only specified values
170                        # perhaps both of these should be normalized to int
171                        cadences = group.query("exptime == @cadenceValues")
172                    else:  # any value is good
173                        cadences = group
174
175                    # total count
176                    stats[mission][cadence]["total"] = len(cadences)
177
178                    if detailed:
179                        # count by sectors
180                        stats[mission][cadence]["by-sectors"] = {}
181                        for m in cadences["mission"]:
182                            # logger.debug(cadences.query("mission == @m")[
183                            #     "exptime"
184                            # ].values)
185                            sectorMatch = re.search(
186                                missionSectorRegExes[mission],
187                                m
188                            )
189                            if not sectorMatch:
190                                raise ValueError(
191                                    " ".join((
192                                        "Couldn't extract sector from",
193                                        f"this mission value: {m}"
194                                    ))
195                                )
196                            sector = sectorMatch.group(1)
197                            if not stats[mission][cadence]["by-sectors"].get(
198                                sector
199                            ):  # this sector hasn't been added yet
200                                stats[mission][cadence]["by-sectors"][
201                                    sector
202                                ] = {}
203                                # save the cadence/exptime too (assuming
204                                # that it is the same for every sector entry)
205                                stats[mission][cadence]["by-sectors"][sector][
206                                    "exptime"
207                                ] = cadences.query("mission == @m")[
208                                    "exptime"
209                                ].values[0]  # there must be a better way
210                            try:
211                                stats[mission][cadence][
212                                    "by-sectors"
213                                ][sector]["count"] += 1
214                            except KeyError:
215                                stats[mission][cadence][
216                                    "by-sectors"
217                                ][sector]["count"] = 1
218    return stats

Gather statistics about available cadence values for a given star.

If detailed is set to False, then function will skip collecting cadence values count by sectors, so resulting statistics will only contain total count of values.

Example:

from phab.utils.databases import lightcurves

stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
    print("Didn't find any results for this star")
else:
    missionName = "Kepler"
    cadenceType = "long"
    sectors = stats.get(
        missionName,
        {}
    ).get(cadenceType)
    if sectors is None:
        print(
            " ".join((
                "There doesn't seem to be any sectors",
                f"with [{cadenceType}] cadence by [{missionName}]"
            ))
        )
    else:
        totalProperty = "total"
        sectorsCount = sectors.get(totalProperty)
        if sectorsCount is None:
            print(
                " ".join((
                    f"For some reason, the [{totalProperty}] property",
                    f"is missing from the [{cadenceType}] cadence",
                    f"collection by [{missionName}]"
                ))
            )
        else:
            print(
                " ".join((
                    f"Total amount of sectors with [{cadenceType}]",
                    f"cadence by [{missionName}]: {sectorsCount}",
                ))
            )
            bySectors = sectors.get("by-sectors")
            if bySectors is None:
                print(
                    " ".join((
                        "For some reason, the [total] property is missing",
                        f"from the [{cadenceType}] cadence collection",
                        f"by [{missionName}]"
                    ))
                )
            else:
                for s in bySectors:
                    print(f"- {s}: {bySectors[s]}")
def getLightCurveIDs(starName: str) -> Dict[str, List[str]]:
221def getLightCurveIDs(
222    starName: str
223) -> Dict[str, List[str]]:
224    """
225    Based on available cadence values statistics for a given star,
226    get names of missions and cadences. For instance, in order to pass
227    them to `altaipony.lcio.from_mast()`.
228
229    Example:
230
231    ``` py
232    from phab.utils.databases import lightcurves
233    from altaipony.lcio import from_mast
234
235    starName = "LTT 1445 A"
236    lightCurveIDs = {}
237
238    try:
239        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
240    except ValueError as ex:
241        print(f"Failed to get light curves missons and cadences. {ex}")
242        raise
243    if not lightCurveIDs:
244        raise ValueError("Didn't find any results for this star")
245    #print(lightCurveIDs)
246
247    for m in lightCurveIDs.keys():
248        #print(f"Mission: {m}")
249        for c in lightCurveIDs[m]:
250            #print(f"- {c}")
251            flc = from_mast(
252                starName,
253                mode="LC",
254                cadence=c,
255                mission=m
256            )
257            #print(flc)
258    ```
259    """
260    lightCurveIDs: Dict[str, List[str]] = {}
261
262    stats: Dict[str, Dict] = getLightCurveStats(
263        starName,
264        detailed=False
265    )
266    if not stats:
267        raise ValueError("Didn't find any results for this star")
268
269    # the order matters, it goes from most important to least important,
270    # and in fact long cadence is so not important that it is discarded
271    # if there is fast or short cadence available
272    cadencePriority = ["fast", "short", "long"]
273
274    for m in stats.keys():
275        lightCurveIDs[m] = []
276        priorityThreshold = 0
277        for cp in cadencePriority:
278            # if there is already fast or short cadence in the list,
279            # don't take long cadence (except for mission K2, because
280            # its long cadence is what's most important even if
281            # there are also fast and short ones)
282            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
283                break
284            if cp in stats[m]:
285                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
286                totalCnt = stats[m][cp].get("total")
287                if totalCnt and totalCnt != 0:
288                    lightCurveIDs[m].append(cp)
289                # else:
290                #     print(
291                #         " ".join((
292                #             f"[WARNING] The [{cp}] cadence count",
293                #             f"in [{m}] is 0 (or missing)"
294                #         ))
295                #     )
296            priorityThreshold += 1
297
298    return lightCurveIDs

Based on available cadence values statistics for a given star, get names of missions and cadences. For instance, in order to pass them to altaipony.lcio.from_mast().

Example:

from phab.utils.databases import lightcurves
from altaipony.lcio import from_mast

starName = "LTT 1445 A"
lightCurveIDs = {}

try:
    lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
    print(f"Failed to get light curves missons and cadences. {ex}")
    raise
if not lightCurveIDs:
    raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)

for m in lightCurveIDs.keys():
    #print(f"Mission: {m}")
    for c in lightCurveIDs[m]:
        #print(f"- {c}")
        flc = from_mast(
            starName,
            mode="LC",
            cadence=c,
            mission=m
        )
        #print(flc)