Edit on GitHub

uio.utility.strings.extraction

 1import re
 2
 3from typing import Optional
 4
 5regexRefNASA = re.compile(r"<a refstr=.* href=.*\/abs\/(.*)\/abstract.*<\/a>")
 6"""
 7Regular expression for extracting short reference value from the full
 8reference string in NASA database.
 9"""
10
11
12def adsRefFromFullReferenceNASA(fullRefValue: str) -> Optional[str]:
13    """
14    Extract just the reference value from the full reference string.
15
16    Example:
17
18    ``` py
19    from uio.utility.strings import extraction
20
21    val = extraction.adsRefFromFullReferenceNASA(
22        "<a refstr=BORSATO_ET_AL__2014 href=https://ui.adsabs.harvard.edu/abs/2014A&A...571A..38B/abstract target=ref>Borsato et al. 2014</a>"
23    )
24    print(val)
25    ```
26    """
27    refMatch = re.search(regexRefNASA, fullRefValue)
28    if refMatch and not len(refMatch.groups()) < 1:
29        return refMatch.group(1)
30    else:
31        return None
regexRefNASA = re.compile('<a refstr=.* href=.*\\/abs\\/(.*)\\/abstract.*<\\/a>')

Regular expression for extracting short reference value from the full reference string in NASA database.

def adsRefFromFullReferenceNASA(fullRefValue: str) -> Optional[str]:
13def adsRefFromFullReferenceNASA(fullRefValue: str) -> Optional[str]:
14    """
15    Extract just the reference value from the full reference string.
16
17    Example:
18
19    ``` py
20    from uio.utility.strings import extraction
21
22    val = extraction.adsRefFromFullReferenceNASA(
23        "<a refstr=BORSATO_ET_AL__2014 href=https://ui.adsabs.harvard.edu/abs/2014A&A...571A..38B/abstract target=ref>Borsato et al. 2014</a>"
24    )
25    print(val)
26    ```
27    """
28    refMatch = re.search(regexRefNASA, fullRefValue)
29    if refMatch and not len(refMatch.groups()) < 1:
30        return refMatch.group(1)
31    else:
32        return None

Extract just the reference value from the full reference string.

Example:

from uio.utility.strings import extraction

val = extraction.adsRefFromFullReferenceNASA(
    "<a refstr=BORSATO_ET_AL__2014 href=https://ui.adsabs.harvard.edu/abs/2014A&A...571A..38B/abstract target=ref>Borsato et al. 2014</a>"
)
print(val)