Edit on GitHub

tasks.reconfirming_stellar_parameters

Reconfirming stellar parameters, enriching original datasets with additional data from various data sources.

  1"""
  2Reconfirming stellar parameters, enriching original datasets
  3with additional data from various data sources.
  4"""
  5
  6import pandas
  7import numpy
  8# import json
  9
 10from typing import Optional, List, Dict
 11
 12try:
 13    from ..utils.files import pickle
 14    from ..utils.databases import tap, simbad
 15except ImportError:
 16    # what the hell is even that, for using the installed package
 17    # imports need to be done way, but for generating documentation
 18    # with pdoc it needs to be a different way
 19    from utils.files import pickle
 20    from utils.databases import tap, simbad
 21
 22
 23def lookForParametersInGaia(
 24    originalTable: pandas.DataFrame,
 25    adqlTable: str,
 26    adqlParameters: List[str],
 27    simbadIDversion: Optional[str] = None
 28) -> pandas.DataFrame:
 29    """
 30    Looking for specified parameters in GAIA database:
 31
 32    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org)
 33    table;
 34    2. Extracts unique list of star names;
 35    3. Gets their GAIA IDs from Simbad database;
 36    4. Queries GAIA database for given parameters;
 37    5. Adds found parameters to the original table as new columns.
 38
 39    Example:
 40
 41    ``` py
 42    from phab.utils.files import pickle
 43    from phab.tasks import reconfirming_stellar_parameters
 44
 45    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 46    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 47        originalTable,
 48        "gaiadr3.astrophysical_parameters",
 49        [
 50            "age_flame",
 51            "logg_gspphot",
 52            "mass_flame",
 53            "mh_gspphot",
 54            "mh_gspspec",
 55            "radius_flame",
 56            "radius_gspphot",
 57            "teff_esphs",
 58            "teff_espucd",
 59            "teff_gspphot",
 60            "teff_gspspec",
 61            "teff_msc1",
 62            "ew_espels_halpha",
 63            "ew_espels_halpha_model"
 64        ],
 65        "dr3"
 66    )
 67    ```
 68
 69    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 70    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 71    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 72    the DR3 ones.
 73
 74    As a result, your original table `tbl` will be enriched with additional
 75    columns according to the list of provided astrophysical parameters.
 76    """
 77
 78    starNames = originalTable["star_name"].unique()
 79
 80    print("\nGetting GAIA IDs from SIMBAD...\n")
 81
 82    stars: Dict[str, Optional[str]] = {}
 83    for star in starNames:
 84        oid = simbad.findIdentificatorFromAnotherCatalogue(
 85            star,
 86            "gaia",
 87            simbadIDversion
 88        )
 89        if oid is None:
 90            print(f"- [WARNING] did not GAIA ID for [{star}]")
 91        else:
 92            print(f"- found GAIA ID for [{star}]: {oid}")
 93            stars[star] = oid
 94
 95    # print(json.dumps(stars, indent=4))
 96
 97    print("\nLooking for parameters in GAIA...\n")
 98
 99    for parameter in adqlParameters:
100        originalTable[parameter] = numpy.array(numpy.nan, dtype=float)
101
102    foundCnt = 0
103    for star in stars:
104        gaiaID = stars[star]
105        print(f"- {star} | {gaiaID}...")
106        resultsGAIA = tap.queryService(
107            tap.getServiceEndpoint("gaia"),
108            " ".join((
109                f"SELECT {', '.join(adqlParameters)}",
110                f"FROM {adqlTable}",
111                f"WHERE source_id = {gaiaID}"
112            ))
113        )
114        if resultsGAIA is None:
115            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
116        else:
117            tbl = resultsGAIA.to_table().to_pandas()
118            foundCnt += 1
119            if len(tbl) > 1:
120                print(
121                    " ".join((
122                        "- [WARNING] GAIA has more than one record",
123                        f"for ID [{gaiaID}], will take only the first one"
124                    ))
125                )
126            # add found values to the new columns in the original table
127            for parameter in adqlParameters:
128                originalTable.loc[
129                    originalTable["star_name"] == star,
130                    parameter
131                ] = tbl.head(1)[parameter][0]
132
133    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
134
135    return originalTable
def lookForParametersInGaia( originalTable: pandas.core.frame.DataFrame, adqlTable: str, adqlParameters: List[str], simbadIDversion: Optional[str] = None) -> pandas.core.frame.DataFrame:
 24def lookForParametersInGaia(
 25    originalTable: pandas.DataFrame,
 26    adqlTable: str,
 27    adqlParameters: List[str],
 28    simbadIDversion: Optional[str] = None
 29) -> pandas.DataFrame:
 30    """
 31    Looking for specified parameters in GAIA database:
 32
 33    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org)
 34    table;
 35    2. Extracts unique list of star names;
 36    3. Gets their GAIA IDs from Simbad database;
 37    4. Queries GAIA database for given parameters;
 38    5. Adds found parameters to the original table as new columns.
 39
 40    Example:
 41
 42    ``` py
 43    from phab.utils.files import pickle
 44    from phab.tasks import reconfirming_stellar_parameters
 45
 46    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 47    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 48        originalTable,
 49        "gaiadr3.astrophysical_parameters",
 50        [
 51            "age_flame",
 52            "logg_gspphot",
 53            "mass_flame",
 54            "mh_gspphot",
 55            "mh_gspspec",
 56            "radius_flame",
 57            "radius_gspphot",
 58            "teff_esphs",
 59            "teff_espucd",
 60            "teff_gspphot",
 61            "teff_gspspec",
 62            "teff_msc1",
 63            "ew_espels_halpha",
 64            "ew_espels_halpha_model"
 65        ],
 66        "dr3"
 67    )
 68    ```
 69
 70    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 71    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 72    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 73    the DR3 ones.
 74
 75    As a result, your original table `tbl` will be enriched with additional
 76    columns according to the list of provided astrophysical parameters.
 77    """
 78
 79    starNames = originalTable["star_name"].unique()
 80
 81    print("\nGetting GAIA IDs from SIMBAD...\n")
 82
 83    stars: Dict[str, Optional[str]] = {}
 84    for star in starNames:
 85        oid = simbad.findIdentificatorFromAnotherCatalogue(
 86            star,
 87            "gaia",
 88            simbadIDversion
 89        )
 90        if oid is None:
 91            print(f"- [WARNING] did not GAIA ID for [{star}]")
 92        else:
 93            print(f"- found GAIA ID for [{star}]: {oid}")
 94            stars[star] = oid
 95
 96    # print(json.dumps(stars, indent=4))
 97
 98    print("\nLooking for parameters in GAIA...\n")
 99
100    for parameter in adqlParameters:
101        originalTable[parameter] = numpy.array(numpy.nan, dtype=float)
102
103    foundCnt = 0
104    for star in stars:
105        gaiaID = stars[star]
106        print(f"- {star} | {gaiaID}...")
107        resultsGAIA = tap.queryService(
108            tap.getServiceEndpoint("gaia"),
109            " ".join((
110                f"SELECT {', '.join(adqlParameters)}",
111                f"FROM {adqlTable}",
112                f"WHERE source_id = {gaiaID}"
113            ))
114        )
115        if resultsGAIA is None:
116            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
117        else:
118            tbl = resultsGAIA.to_table().to_pandas()
119            foundCnt += 1
120            if len(tbl) > 1:
121                print(
122                    " ".join((
123                        "- [WARNING] GAIA has more than one record",
124                        f"for ID [{gaiaID}], will take only the first one"
125                    ))
126                )
127            # add found values to the new columns in the original table
128            for parameter in adqlParameters:
129                originalTable.loc[
130                    originalTable["star_name"] == star,
131                    parameter
132                ] = tbl.head(1)[parameter][0]
133
134    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
135
136    return originalTable

Looking for specified parameters in GAIA database:

  1. Opens a pickle file with original Pandas table;
  2. Extracts unique list of star names;
  3. Gets their GAIA IDs from Simbad database;
  4. Queries GAIA database for given parameters;
  5. Adds found parameters to the original table as new columns.

Example:

from phab.utils.files import pickle
from phab.tasks import reconfirming_stellar_parameters

originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
    originalTable,
    "gaiadr3.astrophysical_parameters",
    [
        "age_flame",
        "logg_gspphot",
        "mass_flame",
        "mh_gspphot",
        "mh_gspspec",
        "radius_flame",
        "radius_gspphot",
        "teff_esphs",
        "teff_espucd",
        "teff_gspphot",
        "teff_gspspec",
        "teff_msc1",
        "ew_espels_halpha",
        "ew_espels_halpha_model"
    ],
    "dr3"
)

You might need to provide simbadIDversion parameter (the dr3 value here) if SIMBAD (utils.databases.simbad.findIdentificatorFromAnotherCatalogue) returns IDs like DR3 2135237601028549888 and you need to get exactly the DR3 ones.

As a result, your original table tbl will be enriched with additional columns according to the list of provided astrophysical parameters.