Edit on GitHub

uio.utility.databases.lightcurves

Getting light curves data.

  1"""
  2Getting light curves data.
  3"""
  4
  5import lightkurve
  6import pandas
  7import re
  8
  9from typing import Optional, Dict, List, Pattern
 10
 11from ..logs.log import logger
 12
 13# apparently, one cannot set long/short threshold,
 14# hence this dictionary
 15#
 16# there are actually more authors available,
 17# but we are only interested in these
 18#
 19authors: Dict[str, Dict] = {
 20    "Kepler":
 21    {
 22        "mission": "Kepler",
 23        "cadence":
 24        {
 25            "long": [1800],
 26            "short": [60]
 27        }
 28    },
 29    "K2":
 30    {
 31        "mission": "K2",
 32        "cadence":
 33        {
 34            "long": [1800],
 35            "short": [60]
 36        }
 37    },
 38    "SPOC":
 39    {
 40        "mission": "TESS",
 41        "cadence":
 42        {
 43            "long": [600],
 44            "short": [120],
 45            "fast": [20]
 46        }
 47    },
 48    "TESS-SPOC":
 49    {
 50        "mission": "TESS",
 51        "cadence":
 52        {
 53            "long": []  # any cadence is long
 54        }
 55    }
 56}
 57"""
 58Dictionary of authors, their cadence values and mapping to missions.
 59"""
 60
 61missionSectorRegExes: Dict[str, Pattern] = {
 62    "Kepler": re.compile(
 63        r"^Kepler\s\w+\s(\d+)$"  # Kepler Quarter 15
 64    ),
 65    "K2": re.compile(
 66        r"^K2\s\w+\s(\d+)$"  # K2 Campaign 12
 67    ),
 68    "TESS": re.compile(
 69        r"^TESS\s\w+\s(\d+)$"  # TESS Sector 40
 70    )
 71}
 72"""
 73Dictionary of regular expressions for extracting sectors.
 74"""
 75
 76
 77def getLightCurveStats(
 78    starName: str,
 79    detailed: bool = True
 80) -> Dict[str, Dict]:
 81    """
 82    Gather statistics about available cadence values for a given star.
 83
 84    If `detailed` is set to `False`, then function will skip collecting
 85    cadence values count by sectors, so resulting statistics will only
 86    contain total count of values.
 87
 88    Example:
 89
 90    ``` py
 91    from uio.utility.databases import lightcurves
 92
 93    stats = lightcurves.getLightCurveStats("Kepler-114")
 94    if not stats:
 95        print("Didn't find any results for this star")
 96    else:
 97        missionName = "Kepler"
 98        cadenceType = "long"
 99        sectors = stats.get(
100            missionName,
101            {}
102        ).get(cadenceType)
103        if sectors is None:
104            print(
105                " ".join((
106                    "There doesn't seem to be any sectors",
107                    f"with [{cadenceType}] cadence by [{missionName}]"
108                ))
109            )
110        else:
111            totalProperty = "total"
112            sectorsCount = sectors.get(totalProperty)
113            if sectorsCount is None:
114                print(
115                    " ".join((
116                        f"For some reason, the [{totalProperty}] property",
117                        f"is missing from the [{cadenceType}] cadence",
118                        f"collection by [{missionName}]"
119                    ))
120                )
121            else:
122                print(
123                    " ".join((
124                        f"Total amount of sectors with [{cadenceType}]",
125                        f"cadence by [{missionName}]: {sectorsCount}",
126                    ))
127                )
128                bySectors = sectors.get("by-sectors")
129                if bySectors is None:
130                    print(
131                        " ".join((
132                            "For some reason, the [total] property is missing",
133                            f"from the [{cadenceType}] cadence collection",
134                            f"by [{missionName}]"
135                        ))
136                    )
137                else:
138                    for s in bySectors:
139                        print(f"- {s}: {bySectors[s]}")
140    ```
141    """
142    stats: Dict[str, Dict] = {}
143
144    lghtcrvs = lightkurve.search_lightcurve(
145        starName,
146        author=tuple(authors.keys())
147    )
148    if len(lghtcrvs) != 0:
149        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
150            ["author", "exptime", "mission"]
151        ]
152        logger.debug(tbl)
153
154        for author, group in (tbl.groupby("author")):
155            if author not in authors:
156                raise ValueError(f"Unknown author: {author}")
157            mission = authors[author]["mission"]
158            if not stats.get(mission):
159                stats[mission] = {}
160            for cadence in ["long", "short", "fast"]:
161                if cadence in authors[author]["cadence"]:
162                    stats[mission][cadence] = {}
163                    cadenceValues: List[int] = (
164                        authors[author]["cadence"][cadence]
165                    )
166                    cadences: pandas.DataFrame = None
167                    if len(cadenceValues) > 0:  # take only specified values
168                        # perhaps both of these should be normalized to int
169                        cadences = group.query("exptime == @cadenceValues")
170                    else:  # any value is good
171                        cadences = group
172
173                    # total count
174                    stats[mission][cadence]["total"] = len(cadences)
175
176                    if detailed:
177                        # count by sectors
178                        stats[mission][cadence]["by-sectors"] = {}
179                        for m in cadences["mission"]:
180                            # logger.debug(cadences.query("mission == @m")[
181                            #     "exptime"
182                            # ].values)
183                            sectorMatch = re.search(
184                                missionSectorRegExes[mission],
185                                m
186                            )
187                            if not sectorMatch:
188                                raise ValueError(
189                                    " ".join((
190                                        "Couldn't extract sector from",
191                                        f"this mission value: {m}"
192                                    ))
193                                )
194                            sector = sectorMatch.group(1)
195                            if not stats[mission][cadence]["by-sectors"].get(
196                                sector
197                            ):  # this sector hasn't been added yet
198                                stats[mission][cadence]["by-sectors"][
199                                    sector
200                                ] = {}
201                                # save the cadence/exptime too (assuming
202                                # that it is the same for every sector entry)
203                                stats[mission][cadence]["by-sectors"][sector][
204                                    "exptime"
205                                ] = cadences.query("mission == @m")[
206                                    "exptime"
207                                ].values[0]  # there must be a better way
208                            try:
209                                stats[mission][cadence][
210                                    "by-sectors"
211                                ][sector]["count"] += 1
212                            except KeyError:
213                                stats[mission][cadence][
214                                    "by-sectors"
215                                ][sector]["count"] = 1
216    return stats
217
218
219def getLightCurveIDs(
220    starName: str
221) -> Dict[str, List[str]]:
222    """
223    Based on available cadence values statistics for a given star,
224    get names of missions and cadences. For instance, in order to pass
225    them to `altaipony.lcio.from_mast()`.
226
227    Example:
228
229    ``` py
230    from uio.utility.databases import lightcurves
231    from altaipony.lcio import from_mast
232
233    starName = "LTT 1445 A"
234    lightCurveIDs = {}
235
236    try:
237        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
238    except ValueError as ex:
239        print(f"Failed to get light curves missons and cadences. {ex}")
240        raise
241    if not lightCurveIDs:
242        raise ValueError("Didn't find any results for this star")
243    #print(lightCurveIDs)
244
245    for m in lightCurveIDs.keys():
246        #print(f"Mission: {m}")
247        for c in lightCurveIDs[m]:
248            #print(f"- {c}")
249            flc = from_mast(
250                starName,
251                mode="LC",
252                cadence=c,
253                mission=m
254            )
255            #print(flc)
256    ```
257    """
258    lightCurveIDs: Dict[str, List[str]] = {}
259
260    stats: Dict[str, Dict] = getLightCurveStats(
261        starName,
262        detailed=False
263    )
264    if not stats:
265        raise ValueError("Didn't find any results for this star")
266
267    # the order matters, it goes from most important to least important,
268    # and in fact long cadence is so not important that it is discarded
269    # if there is fast or short cadence available
270    cadencePriority = ["fast", "short", "long"]
271
272    for m in stats.keys():
273        lightCurveIDs[m] = []
274        priorityThreshold = 0
275        for cp in cadencePriority:
276            # if there is already fast or short cadence in the list,
277            # don't take long cadence (except for mission K2, because
278            # its long cadence is what's most important even if
279            # there are also fast and short ones)
280            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
281                break
282            if cp in stats[m]:
283                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
284                totalCnt = stats[m][cp].get("total")
285                if totalCnt and totalCnt != 0:
286                    lightCurveIDs[m].append(cp)
287                # else:
288                #     print(
289                #         " ".join((
290                #             f"[WARNING] The [{cp}] cadence count",
291                #             f"in [{m}] is 0 (or missing)"
292                #         ))
293                #     )
294            priorityThreshold += 1
295
296    return lightCurveIDs
authors: Dict[str, Dict] = {'Kepler': {'mission': 'Kepler', 'cadence': {'long': [1800], 'short': [60]}}, 'K2': {'mission': 'K2', 'cadence': {'long': [1800], 'short': [60]}}, 'SPOC': {'mission': 'TESS', 'cadence': {'long': [600], 'short': [120], 'fast': [20]}}, 'TESS-SPOC': {'mission': 'TESS', 'cadence': {'long': []}}}

Dictionary of authors, their cadence values and mapping to missions.

missionSectorRegExes: Dict[str, Pattern] = {'Kepler': re.compile('^Kepler\\s\\w+\\s(\\d+)$'), 'K2': re.compile('^K2\\s\\w+\\s(\\d+)$'), 'TESS': re.compile('^TESS\\s\\w+\\s(\\d+)$')}

Dictionary of regular expressions for extracting sectors.

def getLightCurveStats(starName: str, detailed: bool = True) -> Dict[str, Dict]:
 78def getLightCurveStats(
 79    starName: str,
 80    detailed: bool = True
 81) -> Dict[str, Dict]:
 82    """
 83    Gather statistics about available cadence values for a given star.
 84
 85    If `detailed` is set to `False`, then function will skip collecting
 86    cadence values count by sectors, so resulting statistics will only
 87    contain total count of values.
 88
 89    Example:
 90
 91    ``` py
 92    from uio.utility.databases import lightcurves
 93
 94    stats = lightcurves.getLightCurveStats("Kepler-114")
 95    if not stats:
 96        print("Didn't find any results for this star")
 97    else:
 98        missionName = "Kepler"
 99        cadenceType = "long"
100        sectors = stats.get(
101            missionName,
102            {}
103        ).get(cadenceType)
104        if sectors is None:
105            print(
106                " ".join((
107                    "There doesn't seem to be any sectors",
108                    f"with [{cadenceType}] cadence by [{missionName}]"
109                ))
110            )
111        else:
112            totalProperty = "total"
113            sectorsCount = sectors.get(totalProperty)
114            if sectorsCount is None:
115                print(
116                    " ".join((
117                        f"For some reason, the [{totalProperty}] property",
118                        f"is missing from the [{cadenceType}] cadence",
119                        f"collection by [{missionName}]"
120                    ))
121                )
122            else:
123                print(
124                    " ".join((
125                        f"Total amount of sectors with [{cadenceType}]",
126                        f"cadence by [{missionName}]: {sectorsCount}",
127                    ))
128                )
129                bySectors = sectors.get("by-sectors")
130                if bySectors is None:
131                    print(
132                        " ".join((
133                            "For some reason, the [total] property is missing",
134                            f"from the [{cadenceType}] cadence collection",
135                            f"by [{missionName}]"
136                        ))
137                    )
138                else:
139                    for s in bySectors:
140                        print(f"- {s}: {bySectors[s]}")
141    ```
142    """
143    stats: Dict[str, Dict] = {}
144
145    lghtcrvs = lightkurve.search_lightcurve(
146        starName,
147        author=tuple(authors.keys())
148    )
149    if len(lghtcrvs) != 0:
150        tbl: pandas.DataFrame = lghtcrvs.table.to_pandas()[
151            ["author", "exptime", "mission"]
152        ]
153        logger.debug(tbl)
154
155        for author, group in (tbl.groupby("author")):
156            if author not in authors:
157                raise ValueError(f"Unknown author: {author}")
158            mission = authors[author]["mission"]
159            if not stats.get(mission):
160                stats[mission] = {}
161            for cadence in ["long", "short", "fast"]:
162                if cadence in authors[author]["cadence"]:
163                    stats[mission][cadence] = {}
164                    cadenceValues: List[int] = (
165                        authors[author]["cadence"][cadence]
166                    )
167                    cadences: pandas.DataFrame = None
168                    if len(cadenceValues) > 0:  # take only specified values
169                        # perhaps both of these should be normalized to int
170                        cadences = group.query("exptime == @cadenceValues")
171                    else:  # any value is good
172                        cadences = group
173
174                    # total count
175                    stats[mission][cadence]["total"] = len(cadences)
176
177                    if detailed:
178                        # count by sectors
179                        stats[mission][cadence]["by-sectors"] = {}
180                        for m in cadences["mission"]:
181                            # logger.debug(cadences.query("mission == @m")[
182                            #     "exptime"
183                            # ].values)
184                            sectorMatch = re.search(
185                                missionSectorRegExes[mission],
186                                m
187                            )
188                            if not sectorMatch:
189                                raise ValueError(
190                                    " ".join((
191                                        "Couldn't extract sector from",
192                                        f"this mission value: {m}"
193                                    ))
194                                )
195                            sector = sectorMatch.group(1)
196                            if not stats[mission][cadence]["by-sectors"].get(
197                                sector
198                            ):  # this sector hasn't been added yet
199                                stats[mission][cadence]["by-sectors"][
200                                    sector
201                                ] = {}
202                                # save the cadence/exptime too (assuming
203                                # that it is the same for every sector entry)
204                                stats[mission][cadence]["by-sectors"][sector][
205                                    "exptime"
206                                ] = cadences.query("mission == @m")[
207                                    "exptime"
208                                ].values[0]  # there must be a better way
209                            try:
210                                stats[mission][cadence][
211                                    "by-sectors"
212                                ][sector]["count"] += 1
213                            except KeyError:
214                                stats[mission][cadence][
215                                    "by-sectors"
216                                ][sector]["count"] = 1
217    return stats

Gather statistics about available cadence values for a given star.

If detailed is set to False, then function will skip collecting cadence values count by sectors, so resulting statistics will only contain total count of values.

Example:

from uio.utility.databases import lightcurves

stats = lightcurves.getLightCurveStats("Kepler-114")
if not stats:
    print("Didn't find any results for this star")
else:
    missionName = "Kepler"
    cadenceType = "long"
    sectors = stats.get(
        missionName,
        {}
    ).get(cadenceType)
    if sectors is None:
        print(
            " ".join((
                "There doesn't seem to be any sectors",
                f"with [{cadenceType}] cadence by [{missionName}]"
            ))
        )
    else:
        totalProperty = "total"
        sectorsCount = sectors.get(totalProperty)
        if sectorsCount is None:
            print(
                " ".join((
                    f"For some reason, the [{totalProperty}] property",
                    f"is missing from the [{cadenceType}] cadence",
                    f"collection by [{missionName}]"
                ))
            )
        else:
            print(
                " ".join((
                    f"Total amount of sectors with [{cadenceType}]",
                    f"cadence by [{missionName}]: {sectorsCount}",
                ))
            )
            bySectors = sectors.get("by-sectors")
            if bySectors is None:
                print(
                    " ".join((
                        "For some reason, the [total] property is missing",
                        f"from the [{cadenceType}] cadence collection",
                        f"by [{missionName}]"
                    ))
                )
            else:
                for s in bySectors:
                    print(f"- {s}: {bySectors[s]}")
def getLightCurveIDs(starName: str) -> Dict[str, List[str]]:
220def getLightCurveIDs(
221    starName: str
222) -> Dict[str, List[str]]:
223    """
224    Based on available cadence values statistics for a given star,
225    get names of missions and cadences. For instance, in order to pass
226    them to `altaipony.lcio.from_mast()`.
227
228    Example:
229
230    ``` py
231    from uio.utility.databases import lightcurves
232    from altaipony.lcio import from_mast
233
234    starName = "LTT 1445 A"
235    lightCurveIDs = {}
236
237    try:
238        lightCurveIDs = lightcurves.getLightCurveIDs(starName)
239    except ValueError as ex:
240        print(f"Failed to get light curves missons and cadences. {ex}")
241        raise
242    if not lightCurveIDs:
243        raise ValueError("Didn't find any results for this star")
244    #print(lightCurveIDs)
245
246    for m in lightCurveIDs.keys():
247        #print(f"Mission: {m}")
248        for c in lightCurveIDs[m]:
249            #print(f"- {c}")
250            flc = from_mast(
251                starName,
252                mode="LC",
253                cadence=c,
254                mission=m
255            )
256            #print(flc)
257    ```
258    """
259    lightCurveIDs: Dict[str, List[str]] = {}
260
261    stats: Dict[str, Dict] = getLightCurveStats(
262        starName,
263        detailed=False
264    )
265    if not stats:
266        raise ValueError("Didn't find any results for this star")
267
268    # the order matters, it goes from most important to least important,
269    # and in fact long cadence is so not important that it is discarded
270    # if there is fast or short cadence available
271    cadencePriority = ["fast", "short", "long"]
272
273    for m in stats.keys():
274        lightCurveIDs[m] = []
275        priorityThreshold = 0
276        for cp in cadencePriority:
277            # if there is already fast or short cadence in the list,
278            # don't take long cadence (except for mission K2, because
279            # its long cadence is what's most important even if
280            # there are also fast and short ones)
281            if any(lightCurveIDs[m]) and priorityThreshold > 1 and m != "K2":
282                break
283            if cp in stats[m]:
284                # print(f"Count [{cp}]: {stats[m][cp]['total']}")
285                totalCnt = stats[m][cp].get("total")
286                if totalCnt and totalCnt != 0:
287                    lightCurveIDs[m].append(cp)
288                # else:
289                #     print(
290                #         " ".join((
291                #             f"[WARNING] The [{cp}] cadence count",
292                #             f"in [{m}] is 0 (or missing)"
293                #         ))
294                #     )
295            priorityThreshold += 1
296
297    return lightCurveIDs

Based on available cadence values statistics for a given star, get names of missions and cadences. For instance, in order to pass them to altaipony.lcio.from_mast().

Example:

from uio.utility.databases import lightcurves
from altaipony.lcio import from_mast

starName = "LTT 1445 A"
lightCurveIDs = {}

try:
    lightCurveIDs = lightcurves.getLightCurveIDs(starName)
except ValueError as ex:
    print(f"Failed to get light curves missons and cadences. {ex}")
    raise
if not lightCurveIDs:
    raise ValueError("Didn't find any results for this star")
#print(lightCurveIDs)

for m in lightCurveIDs.keys():
    #print(f"Mission: {m}")
    for c in lightCurveIDs[m]:
        #print(f"- {c}")
        flc = from_mast(
            starName,
            mode="LC",
            cadence=c,
            mission=m
        )
        #print(flc)