The National Science Foundation has awarded a $1.6 million BIGDATA grant to Drexel University, in collaboration with the University of Washington, University of Michigan and University of Massachusetts Amherst to research and develop responsible data science methods targeting the early stages of the data life cycle.
Julia Stoyanovich, Computer Science faculty at New York University, is the Principal Investigator (PI) for the grant, which provides funding from September 2017 through August 2021. Stoyanovich will work with Co-PIs Bill Howe, Director of Urbanalytics and Associate Professor in the Information School at UW, H. V. Jagadish, the Bernard A. Galler Collegiate Professor of Electrical Engineering and Computer Science at UM, and Gerome Miklau, Professor in the College of Information and Computer Sciences at UMA.
The project, Foundations of Responsible Data Management, develops techniques and practices to reduce the introduction of algorithmic bias and privacy leaks, while supporting transparency, in the pre-processing of big data. In contrast to existing work on these topics, which has focused on data mining, modeling, analysis and machine learning, this project addresses upstream processes that generate input data for analysis, from discovery and acquisition to querying, ranking and the generation of synthetic data. This work is part of Data, Responsibly, based at Drexel University, which builds tools to embed legal and ethical norms into data sharing, collection and analysis.
@article{grechkin2017wide, title = {Wide-Open: Accelerating public data release by automating detection of overdue datasets}, author = {Grechkin, Maxim and Poon, Hoifung and Howe, Bill}, journal = {PLoS biology}, volume = {15}, number = {6}, pages = {e2002477}, year = {2017}, publisher = {Public Library of Science} }
@inproceedings{stoyanovich2017fides, title = {Fides: Towards a platform for responsible data science}, author = {Stoyanovich, Julia and Howe, Bill and Abiteboul, Serge and Miklau, Gerome and Sahuguet, Arnaud and Weikum, Gerhard}, booktitle = {Proceedings of the 29th International Conference on Scientific and Statistical Database Management}, pages = {26}, year = {2017}, organization = {ACM} }
@inproceedings{young:18, author = {Young, Meg and Rodriguez, Luke and Keller, Emily and Sun, Feiyang and Sa, Boyang and Whittington, Jan and Howe, Bill}, year = {2018}, title = {Beyond Open vs. Closed: Beyond Open vs. Closed: Balancing Individual Privacy and Public Accountability in Data Sharing}, booktitle = {ACM conference on Fairness, Accountability, and Transparency (FAT*)} }
@inproceedings{ping17datasynthesizer, author = {Ping, Haoyue and Stoyanovich, Julia and Howe, Bill}, title = {DataSynthesizer: Privacy-Preserving Synthetic Datasets}, booktitle = {Proceedings of the 29th International Conference on Scientific and Statistical Database Management}, series = {SSDBM '17}, year = {2017}, isbn = {978-1-4503-5282-6}, location = {Chicago, IL, USA}, pages = {42:1--42:5}, articleno = {42}, numpages = {5}, url = {http://doi.acm.org/10.1145/3085504.3091117}, doi = {10.1145/3085504.3091117}, acmid = {3091117}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Data Sharing, Differential Privacy, Synthetic Data} }
@article{howe2017synthetic, title = {Synthetic Data for Social Good}, author = {Howe, Bill and Stoyanovich, Julia and Ping, Haoyue and Herman, Bernease and Gee, Matt}, journal = {arXiv preprint arXiv:1710.08874}, year = {2017} }
@inproceedings{stoyanovich:17, author = {Stoyanovich, Julia and Howe, Bill and Abiteboul, Serge and Miklau, Gerome and Sahuguet, Arnaud and Weikum, Gerhard}, title = {Fides: Towards a Platform for Responsible Data Science}, booktitle = {Proceedings of the 29th International Conference on Scientific and Statistical Database Management}, series = {SSDBM '17}, year = {2017}, isbn = {978-1-4503-5282-6}, location = {Chicago, IL, USA}, pages = {26:1--26:6}, articleno = {26}, numpages = {6}, url = {http://doi.acm.org/10.1145/3085504.3085530}, doi = {10.1145/3085504.3085530}, acmid = {3085530}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {Accountability, Data, Data Ethics, Data Science for Social Good, Fairness, Responsibly, Transparency} }