Aleksandar Shulevski · 9645b4c5
--- a/apub/apub.py

+ 208

− 11
+++ b/apub/apub.py

+ 208

− 11
 @@ -4,6 +4,7 @@ from __future__ import print_function, division, unicode_literals

 # Standard library
 import os
+import glob
 import re
 import sys
 import json
 @@ -26,6 +27,7 @@ from astropy import log
 from astropy.utils.console import ProgressBar

 from . import plot, PACKAGEDIR, INSTRUMENT, SCIENCE
+from matplotlib import pyplot as pl

 # Where is the default location of the SQLite database?
 DEFAULT_DB = os.path.expanduser("data/.apub.db")
 @@ -264,7 +266,7 @@ class PublicationDB(object):
                where += " AND year IN (" + ", ".join(str_year) + ")"
            else:
                where += " AND year = '{}' ".format(year)
-
+        
        cur = self.con.execute("SELECT year, month, metrics, bibcode "
                               "FROM pubs "
                               "WHERE {} "
 @@ -323,7 +325,7 @@ class PublicationDB(object):
        f.write(markdown)
        f.close()

-    def plot(self):
+    def plot(self, year_from, year_to):
        """Saves beautiful plot of the database."""
        for extension in ['png']:
            #plot.plot_by_year(self,
 @@ -333,6 +335,8 @@ class PublicationDB(object):
            
            plot.plot_by_year(self,
                              "apub-publication-rate-lofar.{}".format(extension),
+                              first_year = year_from,
+                              current_year = year_to,
                              instrument='LOFAR')
            '''
            plot.plot_by_year(self,
 @@ -349,7 +353,15 @@ class PublicationDB(object):
                              extrapolate=False)
            '''
            plot.plot_science_piechart(self,
-                                       "apub-piechart.{}".format(extension))
+                                       "apub-piechart.{}".format(extension),
+                                       first_year = year_from,
+                                       last_year = year_to)
+            
+            plot.plot_chord_diagram(self,
+                                    "apub-chord-diagram.{}".format(extension),
+                                    first_year = year_from,
+                                    last_year = year_to,
+                                    instrument='LOFAR')
            '''
            plot.plot_author_count(self,
                                   "apub-author-count.{}".format(extension),
 @@ -724,6 +736,69 @@ def display_abstract(article_dict):
        print('URL: http://adsabs.harvard.edu/abs/' + article_dict['bibcode'])
        print('')

+def plot_citations(output_fn='apub-citation-rate-lofar.png',
+                   first_year = 2011,
+                   last_year = 2022,
+                   barwidth=0.75, 
+                   dpi=200, 
+                   instrument='LOFAR', 
+                   colors=["#3498db", "#27ae60", "#95a5a6"]):
+
+    for file in glob.glob(os.path.expanduser("data/publication_references*.npy")):
+        with open(file, 'rb') as c:
+            citations = np.load(c, allow_pickle=True)
+    citations_dict = {}
+    for citation in citations:
+        year = int(citation._raw['year'])
+        if year in citations_dict:
+            citations_dict[year] = citations_dict[year] + 1
+        else:
+            citations_dict[year] = 0
+    
+    first_year = first_year
+    last_year = last_year
+    #first_year = min(citations_dict.keys())
+    #last_year = max(citations_dict.keys())
+    #'''
+    # Now make the actual plot
+    fig = pl.figure()
+    ax = fig.add_subplot(111)
+    pl.bar(np.array(list(citations_dict.keys())),
+            list(citations_dict.values()),
+            label=instrument,
+            facecolor=colors[0],
+            width=barwidth)
+
+    # Aesthetics
+    pl.ylabel("Number of publications which cite LOFAR publications")
+    ax.get_xaxis().get_major_formatter().set_useOffset(False)
+    pl.xticks(range(first_year - 1, last_year + 1))
+    pl.xlim([first_year - 0.75*barwidth, last_year + 0.75*barwidth])
+    #'''
+    '''
+    pl.legend(bbox_to_anchor=(0.1, 1., 1., 0.),
+              loc=3,
+              ncol=3,
+              borderaxespad=0.,
+              handlelength=0.8,
+              frameon=False)
+    '''
+    #'''
+    # Disable spines
+    ax.spines["left"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["top"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+    # Only show bottom and left ticks
+    ax.get_xaxis().tick_bottom()
+    ax.get_yaxis().tick_left()
+    # Only show horizontal grid lines
+    ax.grid(axis='y')
+    pl.tight_layout(rect=(0, 0, 1, 0.95), h_pad=1.5)
+    log.info("Writing {}".format(output_fn))
+    pl.savefig(output_fn, dpi=dpi)
+    pl.close() 
+    #'''


 #########################
 @@ -820,9 +895,22 @@ def apub_plot(args=None):
                        type=str, default=DEFAULT_DB,
                        help="Location of the ASTRON publication list db. "
                             "Defaults to ~/.apub.db.")
+    parser.add_argument('year_from', nargs='?', type=int, default=None,
+                        help='Year to query from, e.g. 2011.')
+    parser.add_argument('year_to', nargs='?', type=int, default=None,
+                        help='Year to query to, e.g. 2022.')
    args = parser.parse_args(args)

-    PublicationDB(args.f).plot()
+    if args.year_from is None:
+        first_year = 2011
+    else:
+        first_year = args.year_from
+    if args.year_to is None:
+        last_year = datetime.datetime.now().year
+    else:
+        last_year = args.year_to
+    PublicationDB(args.f).plot(first_year, last_year)
+    plot_citations(first_year = first_year, last_year = last_year)


 def apub_update(args=None):
 @@ -923,6 +1011,91 @@ def apub_export(args=None):
    for row in cur.fetchall():
        print('{0},{1},{2},{3}'.format(row[0], row[1], row[2], row[3]))

+def apub_citations(args=None):
+    """Return publications which cite LOFAR publications."""
+    parser = argparse.ArgumentParser(
+        description="Return publications which cite LOFAR publications.")
+    parser.add_argument('month_start', type=str, default='2011-01',
+                        help='Start month, e.g. 2011-01.')
+    parser.add_argument('month_end', nargs='?', default=None,
+                        help='End month, e.g. 2015-06.')
+    args = parser.parse_args(args)
+    print(args)
+    
+    if ads is None:
+        log.error("This action requires the ADS key to be setup.")
+        return
+    '''
+    if input() == 'n':
+        return
+    '''
+    month_start = args.month_start
+    month_end = args.month_end
+    
+    if month_end is None:
+        month_end = datetime.datetime.now().strftime("%Y-%m")
+    
+    # Search for suitable publications
+    log.info("Querying ADS...")
+    database = "astronomy"
+    
+    qry = ads.SearchQuery(q="""(
+                                citations(title: "LOFAR" OR abstract: "LOFAR") 
+                                AND pubdate:[{} TO {}]
+                                )
+                                database:"{}"
+                            """.format(month_start, month_end, database),
+                            fl=FIELDS,
+                            rows=9999999999)
+    
+    articles = list(qry)
+
+    for idx, article in enumerate(articles):
+        # Ignore articles without abstract
+        if not hasattr(article, 'abstract') or article.abstract is None:
+            continue
+        abstract_lower = article.abstract.lower()
+
+        ignore = False
+
+        # Ignore articles already in the database (add this back when this becomes a class methoid)
+        #if article in self:
+        #    ignore = True
+
+        # Ignore all the unrefereed non-arxiv stuff
+        try:
+            if "NOT REFEREED" in article.property and article.pub.lower() != "arxiv e-prints":
+                ignore = True
+        except (AttributeError, TypeError, ads.exceptions.APIResponseError):
+            pass  # no .pub attribute or .property not iterable
+
+        # Ignore proposals and cospar abstracts
+        if ".prop." in article.bibcode or "cosp.." in article.bibcode:
+            ignore = True
+
+        if not ignore:  # Propose to the user
+            articles.pop()
+            '''
+            statusmsg = '(Reviewing "external" article {} out of {}.)\n\n'.format(
+                            idx+1, len(articles))
+            title = article._raw['title'][0]
+            print(title)
+            print('-'*len(title))
+            print(article._raw['abstract'])
+            print('')
+            print('Authors: ' + ', '.join(article._raw['author']))
+            print('Date: ' + article._raw['pubdate'])
+            print('Status: ' + str(article._raw['property']))
+            print('URL: http://adsabs.harvard.edu/abs/' + article._raw['bibcode'])
+            print(article._raw['year'])
+            print()
+            break
+            #print(articles)
+            '''
+            for f in glob.glob(os.path.expanduser("data/publication_references*.npy")):
+                os.remove(f)
+            np.save(os.path.expanduser("data/publication_references_{:}_{:}.npy".format(month_start, month_end)), np.array(articles))
+

 def apub_spreadsheet(args=None):
    """Export the publication database to an Excel spreadsheet."""
 @@ -943,10 +1116,12 @@ def apub_spreadsheet(args=None):
                        help='Only show WSRT publications.')
    parser.add_argument('-a', '--apertif', action='store_true',
                        help='Only show APERTIF publications.')
+    parser.add_argument('-c', '--citations', action='store_true',
+                        help='Write out a separate ASTRON publication citations spreadsheet.')
    args = parser.parse_args(args)

    db = PublicationDB(args.f)
-    spreadsheet = []
+    spreadsheet, cit_spreadsheet = [], []
    if args.lofar and not args.wsrt:
        instrument = "LOFAR"
    elif args.wsrt and not args.lofar:
 @@ -1000,7 +1175,7 @@ def apub_spreadsheet(args=None):
            citations_per_year = metrics['citation_count'] / (publication_age.days / 365)
        except (TypeError, ZeroDivisionError):
            citations_per_year = 0
-        print("----- Pub ----")
+        #print("----- Pub ----")
        loc = []
        for aff in metrics['aff'][1:]:
            loc.append(aff.split(';')[0].split(',')[-1])
 @@ -1021,13 +1196,18 @@ def apub_spreadsheet(args=None):
            "US": 0,
            "AU": 0,
            "ES": 0,
+            "SA": 0,
            "Other": 0
        }
        for loc in unique_locations:
+            '''
+            print('\n')
            print("Location: ", loc)
            print("Occurence: ", unique_locations.get(loc))
            print(loc.split(' '))
+            '''
            locst = loc.strip(" ;,-")
+            #print("Location stripped:", locst)
            if locst == "the Netherlands":
                countries["NL"] += unique_locations.get(loc)
            elif locst == "The Netherlands":
 @@ -1062,10 +1242,14 @@ def apub_spreadsheet(args=None):
                countries["AU"] += unique_locations.get(loc)
            elif locst == "Spain":
                countries["ES"] += unique_locations.get(loc)
+            elif locst == "South Africa":
+                countries["SA"] += unique_locations.get(loc)
+            elif locst == "south africa":
+                countries["SA"] += unique_locations.get(loc)
            else:
                countries["Other"] += unique_locations.get(loc)                
-        print("---------------")
-        print('\n')
+        #print("---------------")
+        #print('\n')
        myrow = collections.OrderedDict([
                    ('bibcode', 'https://ui.adsabs.harvard.edu/abs/'+row[0]),
                    ('Year', row[1]),
 @@ -1100,14 +1284,27 @@ def apub_spreadsheet(args=None):
                    ("US", countries["US"]),
                    ("AU", countries["AU"]),
                    ("ES", countries["ES"]),
+                    ("SA", countries["SA"]),
                    ("Other", countries["Other"])
                    ])
                    #('affiliations', mod_aff)])
        spreadsheet.append(myrow)
    output_fn = 'astron-publications.xls'
-    print('Writing {}'.format(output_fn))
+    log.info('Writing {}'.format(output_fn))
    pd.DataFrame(spreadsheet).to_excel(output_fn, index=False, engine="xlsxwriter")
-
-
+    if args.citations:
+        # Add papers per year spreadsheet export for Grafana
+
+        for file in glob.glob(os.path.expanduser("data/publication_references*.npy")):
+            with open(file, 'rb') as c:
+                citations = np.load(c, allow_pickle=True)
+        for citation in citations:
+            citrow = collections.OrderedDict([
+                        ('bibcode', 'https://ui.adsabs.harvard.edu/abs/'+citation._raw['bibcode']),
+                        ('Year', citation._raw['year'])])
+            cit_spreadsheet.append(citrow)
+        output_fn = 'astron-cited.xls'
+        log.info('Writing {}'.format(output_fn))
+        pd.DataFrame(cit_spreadsheet).to_excel(output_fn, index=False, engine="xlsxwriter")
 if __name__ == '__main__':
    pass