Edit on GitHub

tasks.reconfirming_stellar_parameters

Reconfirming stellar parameters, enriching original datasets with additional data from various data sources.

  1"""
  2Reconfirming stellar parameters, enriching original datasets
  3with additional data from various data sources.
  4"""
  5
  6import pandas
  7import numpy
  8# import json
  9
 10from typing import Optional, List, Dict
 11
 12try:
 13    from ..utils.databases import tap, simbad
 14except ImportError:
 15    # what the hell is even that, for using the installed package
 16    # imports need to be done way, but for generating documentation
 17    # with pdoc it needs to be a different way
 18    from utils.databases import tap, simbad
 19
 20
 21def lookForParametersInGaia(
 22    originalTable: pandas.DataFrame,
 23    adqlTable: str,
 24    adqlParameters: List[str],
 25    simbadIDversion: Optional[str] = None
 26) -> pandas.DataFrame:
 27    """
 28    Looking for specified parameters in GAIA database:
 29
 30    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org)
 31    table;
 32    2. Extracts unique list of star names;
 33    3. Gets their GAIA IDs from Simbad database;
 34    4. Queries GAIA database for given parameters;
 35    5. Adds found parameters to the original table as new columns.
 36
 37    Example:
 38
 39    ``` py
 40    from phab.utils.files import pickle
 41    from phab.tasks import reconfirming_stellar_parameters
 42
 43    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 44    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 45        originalTable,
 46        "gaiadr3.astrophysical_parameters",
 47        [
 48            "age_flame",
 49            "logg_gspphot",
 50            "mass_flame",
 51            "mh_gspphot",
 52            "mh_gspspec",
 53            "radius_flame",
 54            "radius_gspphot",
 55            "teff_esphs",
 56            "teff_espucd",
 57            "teff_gspphot",
 58            "teff_gspspec",
 59            "teff_msc1",
 60            "ew_espels_halpha",
 61            "ew_espels_halpha_model"
 62        ],
 63        "dr3"
 64    )
 65    ```
 66
 67    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 68    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 69    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 70    the DR3 ones.
 71
 72    As a result, your original table `tbl` will be enriched with additional
 73    columns according to the list of provided astrophysical parameters.
 74    """
 75
 76    starNames = originalTable["star_name"].unique()
 77
 78    print("\nGetting GAIA IDs from SIMBAD...\n")
 79
 80    stars: Dict[str, Optional[str]] = {}
 81    for star in starNames:
 82        oid = simbad.findIdentificatorFromAnotherCatalogue(
 83            star,
 84            "gaia",
 85            simbadIDversion
 86        )
 87        if oid is None:
 88            print(f"- [WARNING] did not GAIA ID for [{star}]")
 89        else:
 90            print(f"- found GAIA ID for [{star}]: {oid}")
 91            stars[star] = oid
 92
 93    # print(json.dumps(stars, indent=4))
 94
 95    print("\nLooking for parameters in GAIA...\n")
 96
 97    for parameter in adqlParameters:
 98        originalTable[parameter] = numpy.array(numpy.nan, dtype=float)
 99
100    foundCnt = 0
101    for star in stars:
102        gaiaID = stars[star]
103        print(f"- {star} | {gaiaID}...")
104        resultsGAIA = tap.queryService(
105            tap.getServiceEndpoint("gaia"),
106            " ".join((
107                f"SELECT {', '.join(adqlParameters)}",
108                f"FROM {adqlTable}",
109                f"WHERE source_id = {gaiaID}"
110            ))
111        )
112        if resultsGAIA is None:
113            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
114        else:
115            tbl = resultsGAIA.to_table().to_pandas()
116            foundCnt += 1
117            if len(tbl) > 1:
118                print(
119                    " ".join((
120                        "- [WARNING] GAIA has more than one record",
121                        f"for ID [{gaiaID}], will take only the first one"
122                    ))
123                )
124            # add found values to the new columns in the original table
125            for parameter in adqlParameters:
126                originalTable.loc[
127                    originalTable["star_name"] == star,
128                    parameter
129                ] = tbl.head(1)[parameter][0]
130
131    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
132
133    return originalTable
def lookForParametersInGaia( originalTable: pandas.core.frame.DataFrame, adqlTable: str, adqlParameters: List[str], simbadIDversion: Optional[str] = None) -> pandas.core.frame.DataFrame:
 22def lookForParametersInGaia(
 23    originalTable: pandas.DataFrame,
 24    adqlTable: str,
 25    adqlParameters: List[str],
 26    simbadIDversion: Optional[str] = None
 27) -> pandas.DataFrame:
 28    """
 29    Looking for specified parameters in GAIA database:
 30
 31    1. Opens a pickle file with original [Pandas](https://pandas.pydata.org)
 32    table;
 33    2. Extracts unique list of star names;
 34    3. Gets their GAIA IDs from Simbad database;
 35    4. Queries GAIA database for given parameters;
 36    5. Adds found parameters to the original table as new columns.
 37
 38    Example:
 39
 40    ``` py
 41    from phab.utils.files import pickle
 42    from phab.tasks import reconfirming_stellar_parameters
 43
 44    originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
 45    tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
 46        originalTable,
 47        "gaiadr3.astrophysical_parameters",
 48        [
 49            "age_flame",
 50            "logg_gspphot",
 51            "mass_flame",
 52            "mh_gspphot",
 53            "mh_gspspec",
 54            "radius_flame",
 55            "radius_gspphot",
 56            "teff_esphs",
 57            "teff_espucd",
 58            "teff_gspphot",
 59            "teff_gspspec",
 60            "teff_msc1",
 61            "ew_espels_halpha",
 62            "ew_espels_halpha_model"
 63        ],
 64        "dr3"
 65    )
 66    ```
 67
 68    You might need to provide `simbadIDversion` parameter (*the `dr3` value
 69    here*) if SIMBAD (`utils.databases.simbad.findIdentificatorFromAnotherCatalogue`)
 70    returns IDs like `DR3 2135237601028549888` and you need to get exactly
 71    the DR3 ones.
 72
 73    As a result, your original table `tbl` will be enriched with additional
 74    columns according to the list of provided astrophysical parameters.
 75    """
 76
 77    starNames = originalTable["star_name"].unique()
 78
 79    print("\nGetting GAIA IDs from SIMBAD...\n")
 80
 81    stars: Dict[str, Optional[str]] = {}
 82    for star in starNames:
 83        oid = simbad.findIdentificatorFromAnotherCatalogue(
 84            star,
 85            "gaia",
 86            simbadIDversion
 87        )
 88        if oid is None:
 89            print(f"- [WARNING] did not GAIA ID for [{star}]")
 90        else:
 91            print(f"- found GAIA ID for [{star}]: {oid}")
 92            stars[star] = oid
 93
 94    # print(json.dumps(stars, indent=4))
 95
 96    print("\nLooking for parameters in GAIA...\n")
 97
 98    for parameter in adqlParameters:
 99        originalTable[parameter] = numpy.array(numpy.nan, dtype=float)
100
101    foundCnt = 0
102    for star in stars:
103        gaiaID = stars[star]
104        print(f"- {star} | {gaiaID}...")
105        resultsGAIA = tap.queryService(
106            tap.getServiceEndpoint("gaia"),
107            " ".join((
108                f"SELECT {', '.join(adqlParameters)}",
109                f"FROM {adqlTable}",
110                f"WHERE source_id = {gaiaID}"
111            ))
112        )
113        if resultsGAIA is None:
114            print(f"- [WARNING] did not found anything in GAIA for [{gaiaID}]")
115        else:
116            tbl = resultsGAIA.to_table().to_pandas()
117            foundCnt += 1
118            if len(tbl) > 1:
119                print(
120                    " ".join((
121                        "- [WARNING] GAIA has more than one record",
122                        f"for ID [{gaiaID}], will take only the first one"
123                    ))
124                )
125            # add found values to the new columns in the original table
126            for parameter in adqlParameters:
127                originalTable.loc[
128                    originalTable["star_name"] == star,
129                    parameter
130                ] = tbl.head(1)[parameter][0]
131
132    print(f"\nFound parameters for {foundCnt}/{len(stars)} stars\n")
133
134    return originalTable

Looking for specified parameters in GAIA database:

  1. Opens a pickle file with original Pandas table;
  2. Extracts unique list of star names;
  3. Gets their GAIA IDs from Simbad database;
  4. Queries GAIA database for given parameters;
  5. Adds found parameters to the original table as new columns.

Example:

from phab.utils.files import pickle
from phab.tasks import reconfirming_stellar_parameters

originalTable = pickle.openPickleAsPandasTable("./data/systems-528n.pkl")
tbl = reconfirming_stellar_parameters.lookForParametersInGaia(
    originalTable,
    "gaiadr3.astrophysical_parameters",
    [
        "age_flame",
        "logg_gspphot",
        "mass_flame",
        "mh_gspphot",
        "mh_gspspec",
        "radius_flame",
        "radius_gspphot",
        "teff_esphs",
        "teff_espucd",
        "teff_gspphot",
        "teff_gspspec",
        "teff_msc1",
        "ew_espels_halpha",
        "ew_espels_halpha_model"
    ],
    "dr3"
)

You might need to provide simbadIDversion parameter (the dr3 value here) if SIMBAD (utils.databases.simbad.findIdentificatorFromAnotherCatalogue) returns IDs like DR3 2135237601028549888 and you need to get exactly the DR3 ones.

As a result, your original table tbl will be enriched with additional columns according to the list of provided astrophysical parameters.