#!/usr/bin/env python # coding: utf-8 # # Publications markdown generator for academicpages # # Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). # # The core python code is also in `pubsFromBibs.py`. # Run either from the `markdown_generator` folder after replacing updating the publist dictionary with: # * bib file names # * specific venue keys based on your bib file preferences # * any specific pre-text for specific files # * Collection Name (future feature) # # TODO: Make this work with other databases of citations, # TODO: Merge this with the existing TSV parsing solution from pybtex.database.input import bibtex import pybtex.database.input.bibtex from time import strptime import string import html import os import re #todo: incorporate different collection types rather than a catch all publications, requires other changes to template publist = { "proceeding": { "file" : "proceedings.bib", "venuekey": "booktitle", "venue-pretext": "In the proceedings of ", "collection" : {"name":"publications", "permalink":"/publication/"} }, "journal":{ "file": "pubs.bib", "venuekey" : "journal", "venue-pretext" : "", "collection" : {"name":"publications", "permalink":"/publication/"} } } html_escape_table = { "&": "&", '"': """, "'": "'" } def html_escape(text): """Produce entities within text.""" return "".join(html_escape_table.get(c,c) for c in text) for pubsource in publist: parser = bibtex.Parser() bibdata = parser.parse_file(publist[pubsource]["file"]) #loop through the individual references in a given bibtex file for bib_id in bibdata.entries: #reset default date pub_year = "1900" pub_month = "01" pub_day = "01" b = bibdata.entries[bib_id].fields try: pub_year = f'{b["year"]}' #todo: this hack for month and day needs some cleanup if "month" in b.keys(): if(len(b["month"])<3): pub_month = "0"+b["month"] pub_month = pub_month[-2:] elif(b["month"] not in range(12)): tmnth = strptime(b["month"][:3],'%b').tm_mon pub_month = "{:02d}".format(tmnth) else: pub_month = str(b["month"]) if "day" in b.keys(): pub_day = str(b["day"]) pub_date = pub_year+"-"+pub_month+"-"+pub_day #strip out {} as needed (some bibtex entries that maintain formatting) clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-") url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title) url_slug = url_slug.replace("--","-") md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-") html_filename = (str(pub_date) + "-" + url_slug).replace("--","-") #Build Citation from text citation = "" #citation authors - todo - add highlighting for primary author? for author in bibdata.entries[bib_id].persons["author"]: citation = citation+" "+author.first_names[0]+" "+author.last_names[0]+", " #citation title citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\"" #add venue logic depending on citation type venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","") citation = citation + " " + html_escape(venue) citation = citation + ", " + pub_year + "." ## YAML variables md = "---\ntitle: \"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n' md += """collection: """ + publist[pubsource]["collection"]["name"] md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"] + html_filename note = False if "note" in b.keys(): if len(str(b["note"])) > 5: md += "\nexcerpt: '" + html_escape(b["note"]) + "'" note = True md += "\ndate: " + str(pub_date) md += "\nvenue: '" + html_escape(venue) + "'" url = False if "url" in b.keys(): if len(str(b["url"])) > 5: md += "\npaperurl: '" + b["url"] + "'" url = True md += "\ncitation: '" + html_escape(citation) + "'" md += "\n---" ## Markdown description for individual page if note: md += "\n" + html_escape(b["note"]) + "\n" if url: md += "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n" else: md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation" md_filename = os.path.basename(md_filename) with open("../_publications/" + md_filename, 'w') as f: f.write(md) print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"") # field may not exist for a reference except KeyError as e: print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"") continue