Edit on GitHub


Reconfirming stellar parameters, enriching original datasets with additional data from various data sources.

  2Reconfirming stellar parameters, enriching original datasets
  3with additional data from various data sources.
  6import pandas
  7import numpy
  8# import json
 10from typing import Optional, List, Dict
 12from utils.files import pickle
 13from utils.databases import tap
 14from utils.databases import simbad
 17def lookForParametersInGaia(
 18    originalTable: pandas.DataFrame,
 19    adqlTable: str,
 20    adqlParameters: List[str],
 21    simbadIDversion: Optional[str] = None
 22) -> pandas.DataFrame:
 23    """
 24    Looking for specified parameters in GAIA database:
 26    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) table;
 27    2. Extracts unique list of star names;
 28    3. Gets their GAIA IDs from Simbad database;
 29    4. Queries GAIA database for given parameters;
 30    5. Adds found parameters to the original table as new columns.
 32    Example:
 34    ``` py
 35    from phab.utils.files import pickle
 36    from phab.tasks import reconfirming_stellar_parameters
 38    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 39    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 40        originalTable,
 41        "gaiadr3.astrophysical_parameters",
 42        [
 43            "age_flame",
 44            "logg_gspphot",
 45            "mass_flame",
 46            "mh_gspphot",
 47            "mh_gspspec",
 48            "radius_flame",
 49            "radius_gspphot",
 50            "teff_esphs",
 51            "teff_espucd",
 52            "teff_gspphot",
 53            "teff_gspspec",
 54            "teff_msc1",
 55            "ew_espels_halpha",
 56            "ew_espels_halpha_model"
 57        ],
 58        "dr3"
 59    )
 60    ```
 62    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 63    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 64    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 65    the DR3 ones.
 67    As a result, your original table `tbl` will be enriched with additional
 68    columns according to the list of provided astrophysical parameters.
 69    """
 71    starNames = originalTable["star_name"].unique()
 73    print("\nGetting GAIA IDs from SIMBAD...\n")
 75    stars: Dict[str, Optional[str]] = {}
 76    for star in starNames:
 77        oid = simbad.findIdentificatorFromAnotherCatalogue(star, "gaia", simbadIDversion)
 78        if oid is None:
 79            print(f"- [WARNING] did not GAIA ID for [{star}]")
 80        else:
 81            print(f"- found GAIA ID for [{star}]: {oid}")
 82            stars[star] = oid
 84    # print(json.dumps(stars, indent=4))
 86    print("\nLooking for parameters in GAIA...\n")
 88    for parameter in adqlParameters:
 89        originalTable[parameter] = numpy.array(numpy.NaN, dtype=float)
 91    foundCnt = 0
 92    for star in stars:
 93        gaiaID = stars[star]
 94        print(f"- {star} | {gaiaID}...")
 95        resultsGAIA = tap.queryService(
 96            tap.getServiceEndpoint("gaia"),
 97            " ".join((
 98                f"SELECT {', '.join(adqlParameters)}",
 99                f"FROM {adqlTable}",
100                f"WHERE source_id = {gaiaID}"
101            ))
102        )
103        if resultsGAIA is None:
104            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
105        else:
106            tbl = resultsGAIA.to_table().to_pandas()
107            foundCnt += 1
108            if len(tbl) > 1:
109                print(
110                    " ".join((
111                        "- [WARNING] GAIA has more than one record",
112                        f"for ID [{gaiaID}], will take only the first one"
113                    ))
114                )
115            # add found values to the new columns in the original table
116            for parameter in adqlParameters:
117                originalTable.loc[
118                    originalTable["star_name"] == star,
119                    parameter
120                ] = tbl.head(1)[parameter][0]
122    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
124    return originalTable
def lookForParametersInGaia( originalTable: pandas.core.frame.DataFrame, adqlTable: str, adqlParameters: List[str], simbadIDversion: Optional[str] = None) -> pandas.core.frame.DataFrame:
 18def lookForParametersInGaia(
 19    originalTable: pandas.DataFrame,
 20    adqlTable: str,
 21    adqlParameters: List[str],
 22    simbadIDversion: Optional[str] = None
 23) -> pandas.DataFrame:
 24    """
 25    Looking for specified parameters in GAIA database:
 27    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org) table;
 28    2. Extracts unique list of star names;
 29    3. Gets their GAIA IDs from Simbad database;
 30    4. Queries GAIA database for given parameters;
 31    5. Adds found parameters to the original table as new columns.
 33    Example:
 35    ``` py
 36    from phab.utils.files import pickle
 37    from phab.tasks import reconfirming_stellar_parameters
 39    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 40    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 41        originalTable,
 42        "gaiadr3.astrophysical_parameters",
 43        [
 44            "age_flame",
 45            "logg_gspphot",
 46            "mass_flame",
 47            "mh_gspphot",
 48            "mh_gspspec",
 49            "radius_flame",
 50            "radius_gspphot",
 51            "teff_esphs",
 52            "teff_espucd",
 53            "teff_gspphot",
 54            "teff_gspspec",
 55            "teff_msc1",
 56            "ew_espels_halpha",
 57            "ew_espels_halpha_model"
 58        ],
 59        "dr3"
 60    )
 61    ```
 63    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 64    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 65    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 66    the DR3 ones.
 68    As a result, your original table `tbl` will be enriched with additional
 69    columns according to the list of provided astrophysical parameters.
 70    """
 72    starNames = originalTable["star_name"].unique()
 74    print("\nGetting GAIA IDs from SIMBAD...\n")
 76    stars: Dict[str, Optional[str]] = {}
 77    for star in starNames:
 78        oid = simbad.findIdentificatorFromAnotherCatalogue(star, "gaia", simbadIDversion)
 79        if oid is None:
 80            print(f"- [WARNING] did not GAIA ID for [{star}]")
 81        else:
 82            print(f"- found GAIA ID for [{star}]: {oid}")
 83            stars[star] = oid
 85    # print(json.dumps(stars, indent=4))
 87    print("\nLooking for parameters in GAIA...\n")
 89    for parameter in adqlParameters:
 90        originalTable[parameter] = numpy.array(numpy.NaN, dtype=float)
 92    foundCnt = 0
 93    for star in stars:
 94        gaiaID = stars[star]
 95        print(f"- {star} | {gaiaID}...")
 96        resultsGAIA = tap.queryService(
 97            tap.getServiceEndpoint("gaia"),
 98            " ".join((
 99                f"SELECT {', '.join(adqlParameters)}",
100                f"FROM {adqlTable}",
101                f"WHERE source_id = {gaiaID}"
102            ))
103        )
104        if resultsGAIA is None:
105            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
106        else:
107            tbl = resultsGAIA.to_table().to_pandas()
108            foundCnt += 1
109            if len(tbl) > 1:
110                print(
111                    " ".join((
112                        "- [WARNING] GAIA has more than one record",
113                        f"for ID [{gaiaID}], will take only the first one"
114                    ))
115                )
116            # add found values to the new columns in the original table
117            for parameter in adqlParameters:
118                originalTable.loc[
119                    originalTable["star_name"] == star,
120                    parameter
121                ] = tbl.head(1)[parameter][0]
123    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
125    return originalTable

Looking for specified parameters in GAIA database:

  1. Opens a pickle file with original Pandas table;
  2. Extracts unique list of star names;
  3. Gets their GAIA IDs from Simbad database;
  4. Queries GAIA database for given parameters;
  5. Adds found parameters to the original table as new columns.


from phab.utils.files import pickle
from phab.tasks import reconfirming_stellar_parameters

originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
tbl = reconfirming_stellar_parameters.lookForParametersInGaia(

You might need to provide simbadIDversion parameter (the dr3 value here) if SIMBAD (utils.databases.simbad.findIdentificatorFromAnotherCatalogue) returns IDs like DR3 2135237601028549888 and you need to get exactly the DR3 ones.

As a result, your original table tbl will be enriched with additional columns according to the list of provided astrophysical parameters.