Sonia Haiduc

Sonia Haiduc

Florida State University

  • Home
  • Publications
  • Research
  • Students
  • Teaching
  • Service

Publications

2018

  • M. Alahmadi, Hassel Jonathan, P. Biswas, S. Haiduc, and P. Kumar, “Accurately Predicting the Location of Code Fragments in Programming Video Tutorials Using Deep Learning,” in Proceedings of the 14th International Conference on Predictive Models and Data Analytics in Software Engineering (PROMISE’18), Technical Research Track, Oulu, Finland, 2018, p. to appear (12 pages). Acceptance Rate: 21.2%.
    [Bibtex]
    @InProceedings{Alahmadi2018,
    author = {Alahmadi, Mohammad and Hassel, Jonathan, and Biswas,
    Parajuli and Haiduc, Sonia and Kumar, Piyush.},
    title = {{Accurately Predicting the Location of Code
    Fragments in Programming Video Tutorials Using Deep Learning}},
    booktitle = {Proceedings of the 14th {International} {Conference}
    on {Predictive} {Models} and {Data} {Analytics} in {Software}
    {Engineering} ({PROMISE}'18), {Technical} {Research} {Track}},
    year = {2018},
    pages = {to appear (12 pages)},
    address = {Oulu, Finland},
    month = oct,
    publisher = {Springer},
    acceptancerate = {21.2},
    timestamp = {2018-07-10},
    }
  • [PDF] C. Mills, J. Escobar-Avila, and S. Haiduc, “Automatic Traceability Maintenance via Machine Learning Classification,” in Proceedings of the 34th IEEE International Conference on Software Maintenance and Evolution (ICSME’18), Technical Research Track, Madrid, Spain, 2018, p. to appear (12 pages). Acceptance Rate: 21.2%.
    [Bibtex]
    @InProceedings{Mills2018a,
    author = {Mills, Chris and Escobar-Avila, Javier and Haiduc,
    Sonia},
    title = {{Automatic} {Traceability} {Maintenance} via
    {Machine} {Learning} {Classification}},
    booktitle = {Proceedings of the 34th {IEEE} {International}
    {Conference} on {Software} {Maintenance} and {Evolution} ({ICSME}'18),
    {Technical} {Research} {Track}},
    year = {2018},
    pages = {to appear (12 pages)},
    address = {Madrid, Spain},
    month = sep,
    publisher = {ACM},
    acceptancerate = {21.2},
    pdf =
    {http://www.cs.fsu.edu/~serene/mills_icsme_18_trace/},
    timestamp = {2018-07-10},
    }
  • [PDF] C. Mills, J. Pantiuchina, E. Parra, G. Bavota, and S. Haiduc, “Are Bug Reports Enough for Text Retrieval-based Bug Localization?,” in Proceedings of the 34th IEEE International Conference on Software Maintenance and Evolution (ICSME’18), Technical Research Track, Madrid, Spain, 2018, p. to appear (12 pages). Acceptance Rate: 21.2%.
    [Bibtex]
    @InProceedings{Mills2018b,
    author = {Mills, Chris and Pantiuchina, Jevgenija and Parra,
    Esteban and Bavota, Gabriele and Haiduc, Sonia},
    title = {{Are} {Bug} {Reports} {Enough} for {Text}
    {Retrieval-based} {Bug} {Localization?}},
    booktitle = {Proceedings of the 34th {IEEE} {International}
    {Conference} on {Software} {Maintenance} and {Evolution} ({ICSME}'18),
    {Technical} {Research} {Track}},
    year = {2018},
    pages = {to appear (12 pages)},
    address = {Madrid, Spain},
    month = sep,
    publisher = {ACM},
    acceptancerate = {21.2},
    pdf = {http://www.cs.fsu.edu/~serene/mills_icsme_18_bugs/},
    timestamp = {2018-07-10},
    }
  • [PDF] [DOI] E. Parra, J. Escobar-Avila, and S. Haiduc, “Automatic Tagging for Software Engineering Videos,” in Proceedings of the 26th ACM/IEEE International Conference on Program Comprehension (ICPC’18), Technical Research Track, Gothenburg, Sweden, 2018, pp. 222-232. Acceptance Rate: 37.6%.
    [Bibtex]
    @InProceedings{Parra2018,
    author = {Parra, Esteban and Escobar-Avila, Javier and Haiduc,
    Sonia},
    title = {Automatic {Tagging} for {Software} {Engineering}
    {Videos}},
    booktitle = {Proceedings of the 26th {ACM}/{IEEE} {International}
    {Conference} on {Program} {Comprehension} ({ICPC}'18), {Technical}
    {Research} {Track}},
    year = {2018},
    pages = {222 -- 232},
    address = {Gothenburg, Sweden},
    month = {May 27-28},
    publisher = {ACM},
    acceptancerate = {37.6},
    doi = {10.1145/3196321.3196351},
    keywords = {Software engineering; video tutorials; tagging; text
    retrieval},
    pdf = {http://www.cs.fsu.edu/~serene/parra-icpc2018/},
    timestamp = {2018-08-12},
    }
  • L. Ponzanelli, G. Bavota, A. Mocci, R. Oliveto, M. Di Penta, S. Haiduc, B. Russo, and M. Lanza, “Automatic Identification and Classification of Software Development Video Tutorial Fragments,” IEEE Transactions on Software Engineering (TSE), vol. (accepted for publication), 2018.
    [Bibtex]
    @Article{Ponzanelli2018c,
    author = {Ponzanelli, L. and Bavota, G. and Mocci, A. and Oliveto, R.
    and Di Penta, M. and Haiduc, Sonia and Russo, B. and Lanza, M.},
    title = {{Automatic} {Identification} and {Classification} of
    {Software} {Development} {Video} {Tutorial} {Fragments}},
    journal = {{IEEE} {Transactions} on {Software} {Engineering} ({TSE})},
    year = {2018},
    volume = {(accepted for publication)},
    }

2017

  • [DOI] J. Escobar-Avila, E. Parra, and S. Haiduc, “Text Retrieval-based Tagging of Software Engineering Video Tutorials,” in Proceedings of the 39th ACM/IEEE International Conference on Software Engineering (ICSE’17), Poster Track, Buenos Aires, Argentina, 2017, pp. 341-343. Acceptance Rate: 57.8%.
    [Bibtex]
    @InProceedings{Escobar-Avila2017,
    author = {Escobar-Avila, Javier and Parra, Esteban and Haiduc,
    Sonia},
    title = {Text {Retrieval-based} {Tagging} of {Software}
    {Engineering} {Video} {Tutorials}},
    booktitle = {Proceedings of the 39th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'17), {Poster} {Track}},
    year = {2017},
    pages = {341-343},
    address = {Buenos Aires, Argentina},
    month = {May 20-28},
    publisher = {ACM},
    abstract = {Video tutorials are an emerging form of
    documentation in software engineering and can efficiently provide
    developers with useful information needed for their daily tasks.
    However, to get the information they need, developers have to find the
    right tutorial for their task at hand. Currently, there is little
    information available to quickly judge whether a tutorial
    is relevant to a topic or helpful for task at hand, which can
    lead to missing the best tutorials and wasting time watching
    irrelevant ones. We present the first efforts towards new tagging
    approaches using text retrieval that describe the contents of
    software engineering video tutorials, making it easier and faster
    to understand their purpose and contents. We also present the
    results of a preliminary evaluation of thirteen such approaches,
    revealing the potential of some and limitations of others. Our
    future work will focus on improving on the promising approaches
    determined in this preliminary study and supplementing them
    with additional information.},
    acceptancerate = {57.8},
    doi = {10.1109/ICSE-C.2017.121},
    keywords = {Software engineering; video tutorials; tagging; text
    retrieval},
    timestamp = {2017.02.12},
    }
  • [DOI] C. Mills and S. Haiduc, “A Machine Learning Approach for Determining the Validity of Traceability Links,” in Proceedings of the 39th ACM/IEEE International Conference on Software Engineering (ICSE’17), Poster Track, Buenos Aires, Argentina, 2017, pp. 121-123.
    [Bibtex]
    @InProceedings{Mills2017a,
    author = {Mills, Chris and Haiduc, Sonia},
    title = {A {Machine} {Learning} {Approach} for {Determining} the
    {Validity} of {Traceability} {Links}},
    booktitle = {Proceedings of the 39th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'17), {Poster} {Track}},
    year = {2017},
    pages = {121-123},
    address = {Buenos Aires, Argentina},
    month = {May 20-28},
    publisher = {ACM},
    doi = {10.1109/ICSE-C.2017.86},
    timestamp = {2017.02.12},
    }
  • [DOI] C. Mills and S. Haiduc, “The Impact of Retrieval Direction on IR-based Traceability Link Recovery,” in Proceedings of the 39th ACM/IEEE International Conference on Software Engineering (ICSE’17), NIER Track, Buenos Aires, Argentina, 2017, pp. 51-54. Acceptance Rate: 16%.
    [Bibtex]
    @InProceedings{Mills2017b,
    author = {Mills, Chris and Haiduc, Sonia},
    title = {The {Impact} of {Retrieval} {Direction} on
    {IR}-based {Traceability} {Link} {Recovery}},
    booktitle = {Proceedings of the 39th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'17), {NIER} {Track}},
    year = {2017},
    pages = {51-54},
    address = {Buenos Aires, Argentina},
    month = {May 20-28},
    publisher = {ACM},
    acceptancerate = {16},
    doi = {10.1109/ICSE-NIER.2017.14},
    timestamp = {2017.02.12},
    }
  • [DOI] C. Mills, G. Bavota, S. Haiduc, R. Oliveto, A. Marcus, and A. D. Lucia, “Predicting Query Quality for Applications of Text Retrieval to Software Engineering Tasks,” ACM Transactions on Software Engineering and Methodology (TOSEM), vol. 26, iss. 1, pp. 1-45, 2017.
    [Bibtex]
    @Article{Mills2017e,
    author = {Chris Mills and Gabriele Bavota and Sonia Haiduc and
    Rocco Oliveto and Andrian Marcus and Andrea De Lucia},
    title = {Predicting {Query} {Quality} for {Applications} of {Text}
    {Retrieval} to {Software} {Engineering} {Tasks}},
    journal = {{ACM} {Transactions} on {Software} {Engineering} and
    {Methodology} ({TOSEM})},
    year = {2017},
    volume = {26},
    number = {1},
    pages = {1-45},
    month = jul,
    doi = {10.1145/3078841},
    timestamp = {Mon, 31 Jul 2017 13:11:39 +0200},
    }

2016

  • [DOI] S. Haiduc, V. Arnaoudova, A. Marcus, and G. Antoniol, “The Use of Text Retrieval and Natural Language Processing in Software Engineering,” in Proceedings of the 38th ACM/IEEE International Conference on Software Engineering (ICSE’16), Technical Briefings, Austin, TX, USA, 2016, pp. 898-899. Acceptance Rate: 41.9%.
    [Bibtex]
    @InProceedings{Haiduc2016,
    author = {Haiduc, Sonia and Arnaoudova, Venera and Marcus,
    Adrian and Antoniol, G.},
    title = {The {Use} of {Text} {Retrieval} and {Natural}
    {Language} {Processing} in {Software} {Engineering}},
    booktitle = {Proceedings of the 38th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'16), {Technical}
    {Briefings}},
    year = {2016},
    pages = {898-899},
    address = {Austin, TX, USA},
    publisher = {ACM},
    abstract = {This technical briefing presents the state of the
    art Text Retrieval and Natural Language Processing techniques used in
    Software Engineering and discusses their applications in the field.},
    acceptancerate = {41.9},
    doi = {10.1145/2889160.2891053},
    keywords = {http://dx.doi.org/10.1109/ICSE.2015.301},
    timestamp = {2016.05.05},
    }
  • [DOI] S. Imminni, M. Hasan, M. Duckett, P. Sachdeva, S. Karmakar, P. Kumar, and S. Haiduc, “SPYSE: a Semantic Search Engine for Python Packages and Modules,” in Proceedings of the 38th ACM/IEEE International Conference on Software Engineering (ICSE’16), Tool Demo Track, Austin, TX, USA, 2016, pp. 625-628. Acceptance Rate: 31%.
    [Bibtex]
    @InProceedings{Imminni2016,
    author = {Imminni, S. and Hasan, M. and Duckett, M. and
    Sachdeva, P. and Karmakar, S. and Kumar, P. and Haiduc, Sonia},
    title = {{SPYSE}: A {Semantic} {Search} {Engine} for {Python}
    {Packages} and {Modules}},
    booktitle = {Proceedings of the 38th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'16), {Tool} {Demo}
    {Track}},
    year = {2016},
    pages = {625-628},
    address = {Austin, TX, USA},
    publisher = {ACM},
    abstract = {Code reuse is a common practice among software
    developers, whether novices or experts. Developers often rely on online
    resources in order to find code to reuse. For Python, the Python Package
    Index (PyPI) contains all packages developed for the community and is
    the largest catalog of reusable, open source packages developers can
    consult. While a valuable resource, the state of the art PyPI search has
    very limited capabilities, making it hard for developers to find useful,
    high quality Python code to use for their task at hand.
    We introduce SPYSE (Semantic PYthon Search Engine), a web-based search
    engine that overcomes the limitations of the state of the art, making it
    easier for developers to find useful code. The power of SPYSE lays in
    the combination of three different aspects meant to provide developers
    with relevant, and at the same time high quality code: code semantics,
    popularity, and code quality. SPYSE also allows searching for modules,
    in addition to packages, which opens new reuse opportunities for
    developers, currently not supported. TOOL URL: https://pypi.compgeom.com
    VIDEO URL: https://youtu.be/Praglw-vS50},
    acceptancerate = {31},
    doi = {10.1145/2889160.2889174},
    timestamp = {2016.05.05},
    }
  • [DOI] E. Parra, S. Haiduc, and R. James, “Making a Difference: An Overview of Humanitarian Free Open Source Systems,” in Proceedings of the 38th ACM/IEEE International Conference on Software Engineering (ICSE’16), Poster Track, Austin, TX, USA, 2016, pp. 731-733. Acceptance Rate: 57.8%.
    [Bibtex]
    @InProceedings{Parra2016,
    author = {Parra, Esteban and Haiduc, Sonia and James,
    Rebecca},
    title = {Making a {Difference}: {An} {Overview} of
    {Humanitarian} {Free} {Open} {Source} {Systems}},
    booktitle = {Proceedings of the 38th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'16), {Poster} {Track}},
    year = {2016},
    pages = {731-733},
    address = {Austin, TX, USA},
    publisher = {ACM},
    abstract = {Humanitarian Free Open Source Software (HFOSS)
    serves philanthropic goals that usually benefit non-profit organizations
    meant to improve the human condition. The altruistic goals these systems
    serve can offer developers additional motivation for contributing to OSS
    and have been seen as a way to attract more women to computing majors
    and to improve students' learning.
    We present an exploratory study of the currently existing HFOSS
    projects, aimed at giving an overview of their properties, including the
    most common application domains and the most popular programming
    languages used in this kind of systems. We also investigated the
    assumption that HFOSS systems attract more women developers and found
    the assumption to be incorrect.},
    acceptancerate = {57.8},
    doi = {10.1145/2889160.2892651},
    timestamp = {2016.05.05},
    }
  • [DOI] L. Ponzanelli, G. Bavota, A. Mocci, M. Di Penta, R. Oliveto, M. Hasan, B. Russo, S. Haiduc, and M. Lanza, “Too Long; Didn’t Watch! Extracting Relevant Fragments from Software Development Video Tutorials, Technical Research Track,” in Proceedings of the 38th ACM/IEEE International Conference on Software Engineering (ICSE’16), Austin, TX, USA, 2016, pp. 261-272. Acceptance Rate: 19%.
    [Bibtex]
    @InProceedings{Ponzanelli2016b,
    author = {Ponzanelli, L. and Bavota, G. and Mocci, A. and Di
    Penta, M. and Oliveto, R. and Hasan, M. and Russo, B. and Haiduc, Sonia
    and Lanza, M.},
    title = {Too {Long}; {Didn't} {Watch}! {Extracting}
    {Relevant} {Fragments} from {Software} {Development} {Video}
    {Tutorials}, {Technical} {Research} {Track}},
    booktitle = {Proceedings of the 38th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'16)},
    year = {2016},
    pages = {261-272},
    address = {Austin, TX, USA},
    publisher = {ACM},
    abstract = {When knowledgeable colleagues are not available,
    developers resort to offline and online resources, e.g., tutorials,
    mailing lists, and Q&A websites. These, however, need to be found, read,
    and understood, which takes its toll in terms of time and mental energy.
    A more immediate and accessible resource are video tutorials found on
    the web, which in recent years have seen a steep increase in popularity.
    Nonetheless, videos are an intrinsically noisy data source, and finding
    the right piece of information might be even more cumbersome than using
    the previously mentioned resources.
    We present CodeTube, an approach which mines video tutorials found on
    the web, and enables developers to query their contents. The video
    tutorials are split into coherent fragments, to return only fragments
    related to the query. These are complemented with information from
    additional sources, such as Stack Overflow discussions. The results of
    two studies to assess CodeTube indicate that video tutorials---if
    appropriately processed---represent a useful, yet still under-utilized
    source of information for software development.},
    acceptancerate = {19},
    doi = {10.1145/2884781.2884824},
    timestamp = {2016.05.05},
    }
  • [DOI] L. Ponzanelli, G. Bavota, A. Mocci, Di Penta M. Oliveto, B. R. Russo, S. Haiduc, and M. Lanza, “CodeTube: Extracting Relevant Fragments from Software Development Video Tutorials,” in Proceedings of the 38th ACM/IEEE International Conference on Software Engineering (ICSE’16), Tool Demo Track, Austin, TX, USA, 2016, pp. 645-648. Acceptance Rate: 31%.
    [Bibtex]
    @InProceedings{Ponzanelli2016,
    author = {Ponzanelli, L. and Bavota, G. and Mocci, A. and Di
    Penta, M. Oliveto, and R. Russo, B. and Haiduc, Sonia and Lanza, M.},
    title = {{CodeTube}: {Extracting} {Relevant} {Fragments} from
    {Software} {Development} {Video} {Tutorials}},
    booktitle = {Proceedings of the 38th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'16), {Tool} {Demo}
    {Track}},
    year = {2016},
    pages = {645-648},
    address = {Austin, TX, USA},
    publisher = {ACM},
    abstract = {Nowadays developers heavily rely on sources of
    informal documentation, including Q&A forums, slides, or video
    tutorials, the latter being particularly useful to provide introductory
    notions for a piece of technology. The current practice is that
    developers have to browse sources individually, which in the case of
    video tutorials is cumbersome, as they are lengthy and cannot be
    searched based on their contents.
    We present CodeTube, a Web-based recommender system that analyzes the
    contents of video tutorials and is able to provide, given a query,
    cohesive and self-contained video fragments, along with links to
    relevant Stack Overflow discussions. CodeTube relies on a combination of
    textual analysis and image processing applied on video tutorial frames
    and speech transcripts to split videos into cohesive fragments, index
    them and identify related Stack Overflow discussions.
    DEMO URL: http://codetube.inf.usi.ch
    VIDEO URL: https://youtu.be/yUsUG3g87Dg},
    acceptancerate = {31},
    doi = {10.1145/2889160.2889172},
    timestamp = {2016.05.05},
    }

2015

  • V. Arnaoudova, S. Haiduc, A. Marcus, and G. Antoniol, “The Use of Text Retrieval and Natural Language Processing in Software Engineering,” in ACM Conference on Systems, Programming, Languages and Applications: Software for Humanity (SPLASH’15), Tutorials, Pittsburgh, PA, USA, 2015.
    [Bibtex]
    @InProceedings{Arnaoudova2015,
    author = {Arnaoudova, V. and Haiduc, S. and Marcus, A. and
    Antoniol, G.},
    title = {The {Use} of {Text} {Retrieval} and {Natural} {Language}
    {Processing} in {Software} {Engineering}},
    booktitle = {{ACM} {Conference} on {Systems}, {Programming},
    {Languages} and {Applications}: {Software} for {Humanity} ({SPLASH}'15),
    {Tutorials}},
    year = {2015},
    address = {Pittsburgh, PA, USA},
    month = oct,
    keywords = {information retrieval, Software engineering},
    owner = {USER},
    timestamp = {2015.09.18},
    }
  • V. Arnaoudova, S. Haiduc, A. Marcus, and G. Antoniol, “The Use of Text Retrieval and Natural Language Processing in Software Engineering,” in Proceedings of the 10th Joint Meeting of the European Software Engineering Conference and the ACM/SIGSOFT Symposium on the Foundations of Software Engineering (ESEC/FSE’15), Tutorials, Bergamo, Italy, 2015.
    [Bibtex]
    @InProceedings{Arnaoudova2015a,
    author = {Arnaoudova, V. and Haiduc, S. and Marcus, A. and
    Antoniol, G.},
    title = {The {Use} of {Text} {Retrieval} and {Natural} {Language}
    {Processing} in {Software} {Engineering}},
    booktitle = {Proceedings of the 10th {Joint} {Meeting} of the
    {European} {Software} {Engineering} {Conference} and the {ACM/SIGSOFT}
    {Symposium} on the {Foundations} of {Software} {Engineering}
    ({ESEC/FSE}'15), {Tutorials}},
    year = {2015},
    address = {Bergamo, Italy},
    month = oct,
    keywords = {information retrieval, Software engineering},
    owner = {USER},
    timestamp = {2015.09.18},
    }
  • [DOI] V. Arnaoudova, S. Haiduc, A. Marcus, and G. Antoniol, “The Use of Text Retrieval and Natural Language Processing in Software Engineering,” in Proceedings of the 37th ACM/IEEE International Conference on Software Engineering (ICSE’15), Technical Briefings, Florence, Italy, 2015, pp. 949-950. Acceptance Rate: 40%.
    [Bibtex]
    @InProceedings{Arnaoudova2015b,
    author = {Arnaoudova, V. and Haiduc, S. and Marcus, A. and
    Antoniol, G.},
    title = {The {Use} of {Text} {Retrieval} and {Natural}
    {Language} {Processing} in {Software} {Engineering}},
    booktitle = {Proceedings of the 37th {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'15), {Technical}
    {Briefings}},
    year = {2015},
    pages = {949-950},
    address = {Florence, Italy},
    month = may,
    acceptancerate = {40},
    doi = {10.1109/ICSE.2015.301},
    keywords = {information retrieval, Software engineering},
    timestamp = {2015.09.18},
    }
  • [DOI] J. Escobar-Avila, M. Linares-Vasquez, and S. Haiduc, “Unsupervised Software Categorization Using Bytecode,” in Proceedings of the 23rd IEEE International Conference on Program Comprehension (ICPC), Technical Research Track, Florence, Italy, 2015, pp. 229-239. Acceptance Rate: 31.5%.
    [Bibtex]
    @InProceedings{Escobar-Avila2015b,
    author = {Escobar-Avila, Javier and Linares-Vasquez, Mario and
    Haiduc, Sonia},
    title = {Unsupervised {Software} {Categorization} {Using}
    {Bytecode}},
    booktitle = {Proceedings of the 23rd {IEEE} {International}
    {Conference} on {Program} {Comprehension} ({ICPC}), {Technical}
    {Research} {Track}},
    year = {2015},
    pages = {229--239},
    address = {Florence, Italy},
    month = may,
    abstract = {Video tutorials are an emerging form of
    documentation in software engineering and can efficiently provide
    developers with useful information needed for their daily tasks.
    However, to get the information they need, developers have to find the
    right tutorial for their task at hand. Currently, there is little
    information available to quickly judge whether a tutorial is relevant to
    a topic or helpful for task at hand, which can lead to missing the best
    tutorials and wasting time watching irrelevant ones.
    We present the first efforts towards new tagging approaches using text
    retrieval that describe the contents of software engineering video
    tutorials, making it easier and faster to understand their purpose and
    contents. We also present the results of a preliminary evaluation of
    thirteen such approaches, revealing the potential of some and
    limitations of others. Our future work will focus on improving on the
    promising approaches determined in this preliminary study and
    supplementing them with additional information.},
    acceptancerate = {31.5},
    doi = {10.1109/ICPC.2015.33},
    keywords = {Accuracy, bytecode, clustering, Clustering
    algorithms, Data mining, dirichlet process, Java, Software, software
    categorization, Software libraries, software profiles},
    timestamp = {2016.08.31},
    }
  • S. Haiduc and G. Bavota, “5th Workshop on Mining Unstructured Data,” in Proceedings of the 31st International Conference on Software Maintenance and Evolution (ICSME’15), Bremen, Germany, 2015, pp. 1-6.
    [Bibtex]
    @Conference{MUD2015,
    author = {Haiduc, S. and Bavota, G.},
    title = {5th {Workshop on Mining Unstructured Data}},
    booktitle = {{Proceedings of the 31st International Conference on
    Software Maintenance and Evolution} ({ICSME}’15)},
    year = {2015},
    pages = {1-6},
    address = {Bremen, Germany},
    }
  • [DOI] L. Moreno, G. Bavota, S. Haiduc, M. Di Penta, R. Oliveto, B. Russo, and A. Marcus, “Query-based Configuration of Text Retrieval Solutions for Software Engineering Tasks,” in Proceedings of the 10th Joint Meeting of the European Software Engineering Conference and the ACM/SIGSOFT Symposium on the Foundations of Software Engineering (ESEC/FSE’15), Technical Research Track, Bergamo, Italy, 2015, pp. 567-578. Acceptance Rate: 25.4%.
    [Bibtex]
    @InProceedings{Moreno2015,
    author = {Moreno, Laura and Bavota, Gabriele and Haiduc, Sonia
    and Di Penta, Massimiliano and Oliveto, Rocco and Russo, Barbara and
    Marcus, Andrian},
    title = {Query-based {Configuration} of {Text} {Retrieval}
    {Solutions} for {Software} {Engineering} {Tasks}},
    booktitle = {Proceedings of the 10th {Joint} {Meeting} of the
    {European} {Software} {Engineering} {Conference} and the {ACM/SIGSOFT}
    {Symposium} on the {Foundations} of {Software} {Engineering}
    ({ESEC/FSE}'15), {Technical} {Research} {Track}},
    year = {2015},
    series = {{ESEC}/{FSE} 2015},
    pages = {567--578},
    address = {Bergamo, Italy},
    publisher = {ACM},
    abstract = {Text Retrieval (TR) approaches have been used to
    leverage the textual information contained in software artifacts to
    address a multitude of software engineering (SE) tasks. However, TR
    approaches need to be configured properly in order to lead to good
    results. Current approaches for automatic TR configuration in SE
    configure a single TR approach and then use it for all possible queries.
    In this paper, we show that such a configuration strategy leads to
    suboptimal results, and propose QUEST, the first approach bringing TR
    configuration selection to the query level. QUEST recommends the best TR
    configuration for a given query, based on a supervised learning approach
    that determines the TR configuration that performs the best for each
    query according to its properties. We evaluated QUEST in the context of
    feature and bug localization, using a data set with more than 1,000
    queries. We found that QUEST is able to recommend one of the top three
    TR configurations for a query with a 69\% accuracy, on average. We
    compared the results obtained with the configurations recommended by
    QUEST for every query with those obtained using a single TR
    configuration for all queries in a system and in the entire data set. We
    found that using QUEST we obtain better results than with any of the
    considered TR configurations.},
    acceptancerate = {25.4},
    doi = {10.1145/2786805.2786859},
    isbn = {978-1-4503-3675-8},
    keywords = {configuration, Feature and Bug Localization,
    Text-Retrieval in Software Engineering},
    timestamp = {2016.08.31},
    urldate = {2015-09-18},
    }

2014

  • A. Bacchelli and S. Haiduc, “4th Workshop on Mining Unstructured Data,” in Proceedings of the 30th International Conference on Software Maintenance and Evolution (ICSME’14), Victoria, BC, Canada, 2014.
    [Bibtex]
    @Conference{MUD2014,
    author = {Bacchelli, A. and Haiduc, S.},
    title = {4th {Workshop on Mining Unstructured Data}},
    booktitle = {{Proceedings of the 30th International Conference on
    Software Maintenance and Evolution} ({ICSME}’14)},
    year = {2014},
    address = {Victoria, BC, Canada},
    }
  • [DOI] S. Haiduc, “Supporting Query Formulation for Text Retrieval Applications in Software Engineering,” in Proceedings of the 30th IEEE International Conference on Software Maintenance and Evolution (ICSME’14), Doctoral Symposium, Victoria, Canada, 2014, pp. 657-662.
    [Bibtex]
    @InProceedings{Haiduc2014,
    author = {Haiduc, S.},
    title = {Supporting {Query} {Formulation} for {Text} {Retrieval}
    {Applications} in {Software} {Engineering}},
    booktitle = {Proceedings of the 30th {IEEE} {International}
    {Conference} on {Software} {Maintenance} and {Evolution} ({ICSME}'14),
    {Doctoral} {Symposium}},
    year = {2014},
    pages = {657--662},
    address = {Victoria, Canada},
    month = sep,
    abstract = {Text Retrieval (TR) techniques have been successfully
    used to leverage the textual information found in software artifacts
    with the purpose of aiding developers with their daily tasks. TR
    techniques require a query as input and the usefulness of the results
    they retrieve depends greatly on this query. While some queries retrieve
    relevant information for the current task, others do not, therefore
    pointing developers in the wrong direction. Developers have a hard time
    realizing this before going through the search results, which, in the
    case of "bad" queries means time and effort lost looking at irrelevant
    information. In this scenario, developers have to reformulate the query,
    often without pointers on how to improve it. The work presented in this
    paper introduces novel approaches to address these challenges and makes
    two main contributions: 1) defines the first approach for predicting the
    success of a TR query in the context of SE tasks, 2) introduces
    automatic approaches that analyze a query and improve it by finding the
    most suited reformulation for it. The approaches were evaluated for the
    task of concept location in source code and the results of the performed
    studies reveal their usefulness.},
    doi = {10.1109/ICSME.2014.117},
    keywords = {Conferences, query, query formulation, Query Quality,
    Query Reformulation, relevant information retrieval, software artifacts,
    Software engineering, software maintenance, source code, text analysis,
    text retrieval, text retrieval applications, textual information, TR
    query prediction, TR techniques},
    timestamp = {2016.08.31},
    }

2013

  • A. Bacchelli, N. Bettenburg, L. Guerrouj, and S. Haiduc, “3rd Workshop on Mining Unstructured Data,” in 20th Working Conference on Reverse Engineering (WCRE’13), Koblenz, Germany, 2013, pp. 491-492.
    [Bibtex]
    @Conference{wcre2013,
    author = {Bacchelli, A. and Bettenburg, N. and Guerrouj, L. and
    Haiduc, S.},
    title = {3rd {Workshop} on {Mining} {Unstructured} {Data}},
    booktitle = {{20th Working Conference on Reverse Engineering}
    ({WCRE}'13)},
    year = {2013},
    pages = {491-492},
    address = {Koblenz, Germany},
    month = oct,
    }
  • [DOI] S. Haiduc, G. Bavota, A. Marcus, R. Oliveto, A. De Lucia, and T. Menzies, “Automatic Query Reformulations for Text Retrieval in Software Engineering,” in Proceedings of the 35th International Conference on Software Engineering (ICSE’13) Technical Research Track, San Francisco, CA, USA, 2013, pp. 842-851. Acceptance Rate: 18.5%.
    [Bibtex]
    @InProceedings{Haiduc2013,
    author = {Haiduc, S. and Bavota, G. and Marcus, A. and
    Oliveto, R. and De Lucia, A. and Menzies, T.},
    title = {{Automatic} {Query} {Reformulations} for {Text}
    {Retrieval} in {Software} {Engineering}},
    booktitle = {Proceedings of the 35th {International} {Conference}
    on {Software} {Engineering} ({ICSE}'13) {Technical} {Research} {Track}},
    year = {2013},
    pages = {842--851},
    address = {San Francisco, CA, USA},
    month = may,
    abstract = {There are more than twenty distinct software
    engineering tasks addressed with text retrieval (TR) techniques, such
    as, traceability link recovery, feature location, refactoring, reuse,
    etc. A common issue with all TR applications is that the results of the
    retrieval depend largely on the quality of the query. When a query
    performs poorly, it has to be reformulated and this is a difficult task
    for someone who had trouble writing a good query in the first place. We
    propose a recommender (called Refoqus) based on machine learning, which
    is trained with a sample of queries and relevant results. Then, for a
    given query, it automatically recommends a reformulation strategy that
    should improve its performance, based on the properties of the query. We
    evaluated Refoqus empirically against four baseline approaches that are
    used in natural language document retrieval. The data used for the
    evaluation corresponds to changes from five open source systems in Java
    and C++ and it is used in the context of TR-based concept location in
    source code. Refoqus outperformed the baselines and its recommendations
    lead to query performance improvement or preservation in 84\% of the
    cases (in average).},
    acceptancerate = {18.5},
    doi = {10.1109/ICSE.2013.6606630},
    keywords = {automatic query reformulation, C++, C++ language,
    Context, Engines, feature location, Frequency measurement, Java,
    learning (artificial intelligence), machine learning, Natural languages,
    query formulation, Query Reformulation, recommender system, Recommender
    Systems, Refoqus, Robustness, Software engineering, text retrieval,
    Traceability Link Recovery, Training, Training data},
    timestamp = {2016.08.31},
    }
  • [DOI] S. Haiduc, G. De Rosa, G. Bavota, R. Oliveto, A. De Lucia, and A. Marcus, “Query Quality Prediction and Reformulation for Source Code Search: The Refoqus Tool,” in Proceedings of the 35th International Conference on Software Engineering (ICSE’13), Formal Demonstrations Track, San Francisco, CA, USA, 2013, pp. 1307-1310. Acceptance Rate: 31%.
    [Bibtex]
    @InProceedings{Haiduc2013a,
    author = {Haiduc, S. and De Rosa, G. and Bavota, G. and
    Oliveto, R. and De Lucia, A. and Marcus, A.},
    title = {{Query} {Quality} {Prediction} and {Reformulation}
    for {Source} {Code} {Search}: {The} {Refoqus} {Tool}},
    booktitle = {Proceedings of the 35th {International} {Conference}
    on {Software} {Engineering} ({ICSE}'13), {Formal} {Demonstrations}
    {Track}},
    year = {2013},
    pages = {1307--1310},
    address = {San Francisco, CA, USA},
    month = may,
    abstract = {Developers search source code frequently during
    their daily tasks, to find pieces of code to reuse, to find where to
    implement changes, etc. Code search based on text retrieval (TR)
    techniques has been widely used in the software engineering community
    during the past decade. The accuracy of the TR-based search results
    depends largely on the quality of the query used. We introduce Refoqus,
    an Eclipse plugin which is able to automatically detect the quality of a
    text retrieval query and to propose reformulations for it, when needed,
    in order to improve the results of TR-based code search. A video of
    Refoqus is found online at http://www.youtube.com/watch?v=UQlWGiauyk4.},
    acceptancerate = {31},
    doi = {10.1109/ICSE.2013.6606704},
    keywords = {Context, Eclipse plugin, Feature extraction, query
    formulation, Query Quality, query quality prediction, Query
    Reformulation, Refoqus tool, Software engineering, software maintenance,
    Software systems, source code search, text retrieval, text retrieval
    query, text retrieval technique, Training, Training data, TR-based
    search},
    shorttitle = {Query quality prediction and reformulation for
    source code search},
    timestamp = {2016.08.31},
    }
  • A. Marcus and S. Haiduc, “Text Retrieval Approaches for Concept Location in Source Code,” In Series Lecture Notes in Computer Science, vol. 7171, pp. 126-158, 2013.
    [Bibtex]
    @Article{Marcus2013,
    author = {Marcus, Andrian and Haiduc, Sonia},
    title = {Text {Retrieval} {Approaches} for {Concept} {Location} in
    {Source} {Code}},
    journal = {in {Series} {Lecture} {Notes} in {Computer} {Science}},
    year = {2013},
    volume = {7171},
    pages = {126--158},
    booktitle = {Software {Engineering}},
    copyright = {2013 Springer-Verlag Berlin Heidelberg},
    editor = {Lucia, Andrea De and Ferrucci, Filomena},
    isbn = {978-3-642-36053-4 978-3-642-36054-1},
    keywords = {Concept location, Concern location, feature location,
    information retrieval, Information Storage and Retrieval, Information
    Systems Applications (incl. Internet), Management of Computing and
    Information Systems, Programming Languages, Compilers, Interpreters,
    Programming Techniques, Software engineering, software maintenance},
    language = {en},
    owner = {USER},
    publisher = {Springer Berlin Heidelberg},
    series = {Lecture {Notes} in {Computer} {Science}},
    timestamp = {2016.08.31},
    url =
    {http://link.springer.com/chapter/10.1007/978-3-642-36054-1_5},
    urldate = {2015-09-18},
    }
  • [DOI] L. Moreno, W. Bandara, S. Haiduc, and A. Marcus, “On the Vocabulary Relationship Between Bug Reports and Source Code,” in Proceedings of the 29th IEEE International Conference on Software Maintenance (ICSM’13), Early Research Achievement Track (ERA), Eindhoven, The Netherlands, 2013, pp. 452-455. Acceptance Rate: 41.4%.
    [Bibtex]
    @InProceedings{Moreno2013,
    author = {Moreno, L. and Bandara, W. and Haiduc, S. and
    Marcus, A.},
    title = {On the {Vocabulary} {Relationship} {Between} {Bug}
    {Reports} and {Source} {Code}},
    booktitle = {Proceedings of the 29th {IEEE} {International}
    {Conference} on {Software} {Maintenance} ({ICSM}'13), {Early} {Research}
    {Achievement} {Track} ({ERA})},
    year = {2013},
    pages = {452-455},
    address = {Eindhoven, The Netherlands},
    month = {Sept},
    acceptancerate = {41.4},
    doi = {10.1109/ICSM.2013.70},
    issn = {1063-6773},
    keywords = {information retrieval;program debugging;text
    analysis;TR techniques;bug descriptions;bug location techniques;bug
    reports;source code;text retrieval techniques;Art;Computer bugs;Data
    collection;Large scale integration;Software systems;Vocabulary;Bug
    location;source code vocabulary;text retrieval},
    owner = {USER},
    timestamp = {2015.09.18},
    }

2012

  • [DOI] S. Haiduc, G. Bavota, R. Oliveto, A. De Lucia, and A. Marcus, “Automatic Query Performance Assessment During the Retrieval of Software Artifacts,” in Proceedings of the 27th IEEE/ACM International Conference on Automated Software Engineering (ASE’12), Technical Research Track, Essen, Germany, 2012, pp. 90-99. Acceptance Rate: 15%.
    [Bibtex]
    @InProceedings{Haiduc2012,
    author = {Haiduc, S. and Bavota, G. and Oliveto, R. and De
    Lucia, A. and Marcus, A.},
    title = {{Automatic} {Query} {Performance} {Assessment}
    {During} the {Retrieval} of {Software} {Artifacts}},
    booktitle = {Proceedings of the 27th {IEEE}/{ACM} {International}
    {Conference} on {Automated} {Software} {Engineering} ({ASE}'12),
    {Technical} {Research} {Track}},
    year = {2012},
    pages = {90--99},
    address = {Essen, Germany},
    month = sep,
    abstract = {Text-based search and retrieval is used by
    developers in the context of many SE tasks, such as, concept location,
    traceability link retrieval, reuse, impact analysis, etc. Solutions for
    software text search range from regular expression matching to complex
    techniques using text retrieval. In all cases, the results of a search
    depend on the query formulated by the developer. A developer needs to
    run a query and look at the results before realizing that it needs
    reformulating. Our aim is to automatically assess the performance of a
    query before it is executed. We introduce an automatic query performance
    assessment approach for software artifact retrieval, which uses 21
    measures from the field of text retrieval. We evaluate the approach in
    the context of concept location in source code. The evaluation shows
    that our approach is able to predict the performance of queries with
    79\% accuracy, using very little training data.},
    acceptancerate = {15},
    doi = {10.1145/2351676.2351690},
    keywords = {automatic query performance assessment, Concept
    location, Query performance, query processing, regular expression
    matching, SE tasks, software artifact retrieval, Software engineering,
    software text search, source code, text analysis, text-based retrieval,
    text-based search, text retrieval},
    timestamp = {2016.08.31},
    }
  • [DOI] S. Haiduc, G. Bavota, R. Oliveto, A. Marcus, and A. De Lucia, “Evaluating the Specificity of Text Retrieval Queries to Support Software Engineering Tasks,” in Proceedings of the 34th International Conference on Software Engineering (ICSE’12), NIER Track, Zurich, Switzerland, 2012, pp. 1273-1276. Acceptance Rate: 18%.
    [Bibtex]
    @InProceedings{Haiduc2012a,
    author = {Haiduc, S. and Bavota, G. and Oliveto, R. and
    Marcus, A. and De Lucia, A.},
    title = {{Evaluating} the {Specificity} of {Text} {Retrieval}
    {Queries} to {Support} {Software} {Engineering} {Tasks}},
    booktitle = {Proceedings of the 34th {International} {Conference}
    on {Software} {Engineering} ({ICSE}'12), {NIER} {Track}},
    year = {2012},
    pages = {1273--1276},
    address = {Zurich, Switzerland},
    month = jun,
    abstract = {Text retrieval approaches have been used to address
    many software engineering tasks. In most cases, their use involves
    issuing a textual query to retrieve a set of relevant software artifacts
    from the system. The performance of all these approaches depends on the
    quality of the given query (i.e., its ability to describe the
    information need in such a way that the relevant software artifacts are
    retrieved during the search). Currently, the only way to tell that a
    query failed to lead to the expected software artifacts is by investing
    time and effort in analyzing the search results. In addition, it is
    often very difficult to ascertain what part of the query leads to poor
    results. We propose a novel pre-retrieval metric, which reflects the
    quality of a query by measuring the specificity of its terms. We
    exemplify the use of the new specificity metric on the task of concept
    location in source code. A preliminary empirical study shows that our
    metric is a good effort predictor for text retrieval-based concept
    location, outperforming existing techniques from the field of natural
    language document retrieval.},
    acceptancerate = {18},
    doi = {10.1109/ICSE.2012.6227101},
    keywords = {Concept location, Context, Correlation, Entropy,
    information retrieval, Measurement, Natural languages, natural language
    text, preretrieval metric, query processing, Query Quality, Query
    specificity, Software, software artifacts, software engineering tasks,
    software metrics, source code, specificity evaluation, specificity
    metric, text analysis, text retrieval, text retrieval-based concept
    location, text retrieval queries},
    timestamp = {2016.08.31},
    }
  • S. Haiduc, G. Bavota, R. Oliveto, Marcus Andrian, and De Lucia A., “Automatic Query Quality Assessment for the Retrieval of Software Artifacts,” in Workshop on The Next Five Years of Text Analysis in Software Maintenance, Riva del Garda, Italy, 2012.
    [Bibtex]
    @InProceedings{w2012,
    author = {Haiduc, Sonia and Bavota, G. and Oliveto, R. and Marcus
    Andrian and De Lucia, A.,},
    title = {{Automatic} {Query} {Quality} {Assessment} for the
    {Retrieval} of {Software} {Artifacts}},
    booktitle = {{Workshop} on {The} {Next} {Five} {Years} of {Text}
    {Analysis} in {Software} {Maintenance}},
    year = {2012},
    address = {Riva del Garda, Italy},
    month = sep,
    }

2011

  • [DOI] S. L. Abebe, S. Haiduc, P. Tonella, and A. Marcus, “The Effect of Lexicon Bad Smells on Concept Location in Source Code,” in Proceedings of the 11th IEEE International Working Conference on Source Code Analysis and Manipulation (SCAM’11), Technical Research Track, Williamsburg, VA, USA, 2011, pp. 125-134. Acceptance Rate: 31%.
    [Bibtex]
    @InProceedings{Abebe2011,
    author = {Abebe, S.L. and Haiduc, S. and Tonella, P. and
    Marcus, A.},
    title = {The {Effect} of {Lexicon} {Bad} {Smells} on
    {Concept} {Location} in {Source} {Code}},
    booktitle = {Proceedings of the 11th {IEEE} {International}
    {Working} {Conference} on {Source} {Code} {Analysis} and {Manipulation}
    ({SCAM}'11), {Technical} {Research} {Track}},
    year = {2011},
    pages = {125--134},
    address = {Williamsburg, VA, USA},
    month = sep,
    abstract = {Experienced programmers choose identifier names
    carefully, in the attempt to convey information about the role and
    behavior of the labeled code entity in a concise and expressive way. In
    fact, during program understanding the names given to code entities
    represent one of the major sources of information used by developers. We
    conjecture that lexicon bad smells, such as, extreme contractions,
    inconsistent term use, odd grammatical structure, etc., can hinder the
    execution of maintenance tasks which rely on program understanding. We
    propose an approach to determine the extent of this impact and
    instantiate it on the task of concept location. In particular, we
    conducted a study on two open source software systems where we
    investigated how lexicon bad smells affect Information Retrieval-based
    concept location. In this study, the classes changed in response to past
    modification requests are located before and after lexicon bad smells
    are identified and removed from the source code. The results indicate
    that lexicon bad smells impact concept location when using IR-based
    techniques.},
    acceptancerate = {31},
    doi = {10.1109/SCAM.2011.18},
    keywords = {code smells, Computer bugs, Concept location,
    Containers, extreme contraction, Filtering, grammars, identifier name,
    inconsistent term use, information retrieval, labeled code entity,
    lexicon bad smell, lexicon bad smells, Maintenance engineering,
    maintenance task execution, modification request, object-oriented
    programming, object-oriented software system, odd grammatical structure,
    open source software system, program comprehension, program
    understanding, public domain software, reverse engineering, software
    lexicon, software maintenance, Software systems, source code concept
    location, Terminology, text retrieval},
    timestamp = {2016.08.31},
    }
  • [DOI] S. Haiduc, “Automatically Detecting the Quality of the Query and its Implications in IR-Based Concept Location,” in Proceedings of the 26th IEEE/ACM International Conference on Automated Software Engineering (ASE’11), Doctoral Symposium, Lawrence, KS, USA, 2011, pp. 637-640.
    [Bibtex]
    @InProceedings{Haiduc2011a,
    author = {Haiduc, S.},
    title = {{Automatically} {Detecting} the {Quality} of the {Query}
    and its {Implications} in {IR}-{Based} {Concept} {Location}},
    booktitle = {Proceedings of the 26th {IEEE}/{ACM} {International}
    {Conference} on {Automated} {Software} {Engineering} ({ASE}'11),
    {Doctoral} {Symposium}},
    year = {2011},
    pages = {637--640},
    address = {Lawrence, KS, USA},
    month = nov,
    abstract = {Concept location is an essential task during software
    maintenance and in particular program comprehension activities. One of
    the approaches to this task is based on leveraging the lexical
    information found in the source code by means of Information Retrieval
    techniques. All IR-based approaches to concept location are highly
    dependent on the queries written by the users. An IR approach, even
    though good on average, might fail when the input query is poor.
    Currently there is no way to tell when a query leads to poor results for
    IR-based concept location, unless a considerable effort is put into
    analyzing the results after the fact. We propose an approach based on
    recent advances in the field of IR research, which aims at automatically
    determining the difficulty a query poses to an IR-based concept location
    technique. We plan to evaluate several models and relate them to IR
    performance metrics.},
    doi = {10.1109/ASE.2011.6100144},
    keywords = {Concept location, Conferences, Correlation, Estimation,
    information retrieval, information retrieval-based concept location,
    lexical information, Measurement, Prediction algorithms, program
    comprehension, program comprehension activities, query, query
    processing, Query Quality, search, search engines, software maintenance,
    source code},
    owner = {USER},
    timestamp = {2016.08.31},
    }
  • [DOI] S. Haiduc and A. Marcus, “On the Effect of the Query in IR-based Concept Location,” in Proceedings of the 19th IEEE International Conference on Program Comprehension (ICPC’11), Student Symposium, Kingston, Canada, 2011, pp. 234-237.
    [Bibtex]
    @InProceedings{Haiduc2011,
    author = {Haiduc, S. and Marcus, A.},
    title = {On the {Effect} of the {Query} in {IR}-based {Concept}
    {Location}},
    booktitle = {Proceedings of the 19th {IEEE} {International}
    {Conference} on {Program} {Comprehension} ({ICPC}'11), {Student}
    {Symposium}},
    year = {2011},
    pages = {234--237},
    address = {Kingston, Canada},
    month = jun,
    abstract = {Concept location is an essential task during software
    maintenance and in particular program comprehension activities. One of
    the approaches to this task is the based on leveraging the lexical
    information found in the source code by means of Information Retrieval
    techniques. All IR-based approaches to concept location are highly
    dependent on the queries written by the users. An IR approach, even
    though good on average, might fail when the input query is poor.
    Currently there is no way to tell when a query leads to poor results for
    IR-based concept location, unless a considerable effort is put into
    analyzing the results after the fact. We propose an approach based on
    recent advances in the field of IR research, which aims at automatically
    determining the difficulty a query poses to an IR-based concept location
    technique. We plan to evaluate several models and relate them to IR
    performance metrics.},
    doi = {10.1109/ICPC.2011.48},
    keywords = {Concept location, Conferences, Correlation, Estimation,
    information retrieval, information retrieval techniques, IR-based
    concept location, IR performance metrics, lexical information,
    Measurement, Prediction algorithms, program comprehension, program
    comprehension activity, query, query processing, search, search engines,
    software maintenance, software metrics, software performance evaluation,
    source code},
    timestamp = {2016.08.31},
    }

2010

  • [DOI] S. Haiduc, J. Aponte, and A. Marcus, “Supporting Program Comprehension with Source Code Summarization,” in Proceedings of the 32nd ACM/IEEE International Conference on Software Engineering (ICSE’10), NIER Track, Cape Town, South Africa, 2010, pp. 223-226. Acceptance Rate: 25%.
    [Bibtex]
    @InProceedings{Haiduc2010,
    author = {Haiduc, S. and Aponte, J. and Marcus, A.},
    title = {{Supporting} {Program} {Comprehension} with {Source}
    {Code} {Summarization}},
    booktitle = {Proceedings of the 32nd {ACM}/{IEEE} {International}
    {Conference} on {Software} {Engineering} ({ICSE}'10), {NIER} {Track}},
    year = {2010},
    volume = {2},
    pages = {223--226},
    address = {Cape Town, South Africa},
    month = may,
    abstract = {One of the main challenges faced by today's
    developers is keeping up with the staggering amount of source code that
    needs to be read and understood. In order to help developers with this
    problem and reduce the costs associated with it, one solution is to use
    simple textual descriptions of source code entities that developers can
    grasp easily, while capturing the code semantics precisely. We propose
    an approach to automatically determine such descriptions, based on
    automated text summarization technology.},
    acceptancerate = {25},
    doi = {10.1145/1810295.1810335},
    keywords = {automated text summarization, code semantics, cost
    reduction, Large scale integration, Natural languages, program
    comprehension, reverse engineering, Semantics, software cost estimation,
    Software engineering, software maintenance, Software systems, Source
    code summarization, summary, Tagging, text summarization, textual
    description},
    timestamp = {2016.08.31},
    }
  • [DOI] S. Haiduc, J. Aponte, L. Moreno, and A. Marcus, “On the Use of Automated Text Summarization Techniques for Summarizing Source Code,” in Proceedings of the 17th Working Conference on Reverse Engineering (WCRE’10), Technical Research Track, Beverly, MA, USA, 2010, pp. 35-44. Acceptance Rate: 31%.
    [Bibtex]
    @InProceedings{Haiduc2010a,
    author = {Haiduc, S. and Aponte, J. and Moreno, L. and Marcus,
    A.},
    title = {On the {Use} of {Automated} {Text} {Summarization}
    {Techniques} for {Summarizing} {Source} {Code}},
    booktitle = {Proceedings of the 17th {Working} {Conference} on
    {Reverse} {Engineering} ({WCRE}'10), {Technical} {Research} {Track}},
    year = {2010},
    pages = {35--44},
    address = {Beverly, MA, USA},
    month = oct,
    abstract = {During maintenance developers cannot read the entire
    code of large systems. They need a way to get a quick understanding of
    source code entities (such as, classes, methods, packages, etc.), so
    they can efficiently identify and then focus on the ones related to
    their task at hand. Sometimes reading just a method header or a class
    name does not tell enough about its purpose and meaning, while reading
    the entire implementation takes too long. We study a solution which
    mitigates the two approaches, i.e., short and accurate textual
    descriptions that illustrate the software entities without having to
    read the details of the implementation. We create such descriptions
    using techniques from automatic text summarization. The paper presents a
    study that investigates the suitability of various such techniques for
    generating source code summaries. The results indicate that a
    combination of text summarization techniques is most appropriate for
    source code summarization and that developers generally agree with the
    summaries produced.},
    acceptancerate = {31},
    doi = {10.1109/WCRE.2010.13},
    keywords = {automated text summarization technique, Computer
    science, Correlation, Large scale integration, Lead, program
    comprehension, Semantics, software entity, software maintenance,
    Software systems, Source code summarization, text summarization, textual
    description},
    timestamp = {2016.08.31},
    }

2009

  • [DOI] S. L. Abebe, S. Haiduc, A. Marcus, P. Tonella, and G. Antoniol, “Analyzing the Evolution of the Source Code Vocabulary,” in Proceedings of the 13th European Conference on Software Maintenance and Reengineering (CSMR ’09), Technical Research Track, Kaiserslautern, Germany, 2009, pp. 189-198. Acceptance Rate: 30%.
    [Bibtex]
    @InProceedings{Abebe2009a,
    author = {Abebe, S.L. and Haiduc, S. and Marcus, A. and
    Tonella, P. and Antoniol, G.},
    title = {Analyzing the {Evolution} of the {Source} {Code}
    {Vocabulary}},
    booktitle = {Proceedings of the 13th {European} {Conference} on
    {Software} {Maintenance} and {Reengineering} ({CSMR} '09), {Technical}
    {Research} {Track}},
    year = {2009},
    pages = {189-198},
    address = {Kaiserslautern, Germany},
    month = {March},
    acceptancerate = {30},
    doi = {10.1109/CSMR.2009.61},
    issn = {1534-5351},
    keywords = {software maintenance;programming language
    grammar;software artifact;software systems;source code;source code
    vocabulary;Computer languages;Computer science;Guidelines;Information
    analysis;Knowledge management;Software engineering;Software
    maintenance;Software systems;Vocabulary;Writing;Lexicon
    evolution;Software vocabulary;Text mining},
    timestamp = {2015.09.18},
    }
  • [DOI] S. L. Abebe, S. Haiduc, P. Tonella, and A. Marcus, “Lexicon Bad Smells in Software,” in Proceedings of the 16th Working Conference on Reverse Engineering (WCRE ’09), Technical Research Track, Little, France, 2009, pp. 95-99. Acceptance Rate: 25%.
    [Bibtex]
    @InProceedings{Abebe2009,
    author = {Abebe, S.L. and Haiduc, S. and Tonella, P. and
    Marcus, A.},
    title = {Lexicon {Bad} {Smells} in {Software}},
    booktitle = {Proceedings of the 16th {Working} {Conference} on
    {Reverse} {Engineering} ({WCRE} '09), {Technical} {Research} {Track}},
    year = {2009},
    pages = {95--99},
    address = {Little, France},
    month = oct,
    abstract = {We introduce the notion of "lexicon bad smell",
    which parallels that of "code smell" and indicates some potential
    lexicon construction problems that can be addressed through refactoring
    (e.g., renaming). We created a catalog of lexicon bad smells and we
    developed a publicly available suite of detectors to locate them. The
    paper presents a case study in which we used the detectors on two
    open-source systems. The study revealed the main challenges faced in
    detecting the lexicon bad smells.},
    acceptancerate = {25},
    doi = {10.1109/WCRE.2009.26},
    keywords = {Computer science, Detectors, Documentation, Face
    detection, lexicon construction problem, Open source software,
    open-source system, programming environments, Programming profession,
    public domain software, reverse engineering, software lexicon bad smell
    catalog, software maintenance, software refactoring, source code smell,
    Speech, Terminology},
    timestamp = {2016.08.31},
    }
  • [DOI] G. Gay, S. Haiduc, A. Marcus, and T. Menzies, “On the Use of Relevance Feedback in IR-Based Concept Location,” in Proceedings of the 25th IEEE International Conference on Software Maintenance (ICSM’09), Technical Research Track, Edmonton, Canada, 2009, pp. 351-360. Acceptance Rate: 22%.
    [Bibtex]
    @InProceedings{Gay2009,
    author = {Gay, G. and Haiduc, S. and Marcus, A. and Menzies,
    T.},
    title = {On the {Use} of {Relevance} {Feedback} in
    {{IR}}-{Based} {Concept} {Location}},
    booktitle = {Proceedings of the 25th {IEEE} {International}
    {Conference} on {Software} {Maintenance} ({ICSM}'09), {Technical}
    {Research} {Track}},
    year = {2009},
    pages = {351--360},
    address = {Edmonton, Canada},
    month = sep,
    abstract = {Concept location is a critical activity during
    software evolution as it produces the location where a change is to
    start in response to a modification request, such as, a bug report or a
    new feature request. Lexical-based concept location techniques rely on
    matching the text embedded in the source code to queries formulated by
    the developers. The efficiency of such techniques is strongly dependent
    on the ability of the developer to write good queries. We propose an
    approach to augment information retrieval (IR) based concept location
    via an explicit relevance feedback (RF) mechanism. RF is a two-part
    process in which the developer judges existing results returned by a
    search and the IR system uses this information to perform a new search,
    returning more relevant information to the user. A set of case studies
    performed on open source software systems reveals the impact of RF on IR
    based concept location.},
    acceptancerate = {22},
    doi = {10.1109/ICSM.2009.5306315},
    keywords = {Computer science, Humans, information retrieval,
    Internet, lexical-based concept location, Open source software, query
    processing, query writing, Radio frequency, relevance feedback, search
    engines, Software engineering, software evolution, software maintenance,
    Software systems, State feedback},
    timestamp = {2016.08.31},
    }

2008

  • [DOI] S. Haiduc and A. Marcus, “On the Use of Domain Terms in Source Code,” in Proceedings of the 16th IEEE International Conference on Program Comprehension (ICPC’08), Technical Research Track, Amsterdam, The Netherlands, 2008, pp. 113-122. Acceptance Rate: 35%.
    [Bibtex]
    @InProceedings{Haiduc2008,
    author = {Haiduc, S. and Marcus, A.},
    title = {On the {Use} of {Domain} {Terms} in {Source}
    {Code}},
    booktitle = {Proceedings of the 16th {IEEE} {International}
    {Conference} on {Program} {Comprehension} ({ICPC}'08), {Technical}
    {Research} {Track}},
    year = {2008},
    pages = {113--122},
    address = {Amsterdam, The Netherlands},
    month = {June},
    abstract = {Information about the problem domain of the software
    and the solution it implements is often embedded by developers in
    comments and identifiers. When using software developed by others or
    when are new to a project, programmers know little about how domain
    information is reflected in the source code. Programmers often learn
    about the domain from external sources such as books, articles, etc.
    Hence, it is important to use in comments and identifiers terms that are
    commonly known in the domain literature, as it is likely that
    programmers will use such terms when searching the source code. The
    paper presents a case study that investigated how domain terms are used
    in comments and identifiers. The study focused on three research
    questions: (1) to what degree are domain terms found in the source code
    of software from a particular problem domain?; (2) which is the
    preponderant source of domain terms: identifiers or comments?; and (3)
    to what degree are domain terms shared between several systems from the
    same problem domain? Within the studied software, we found that in
    average: 42\% of the domain terms were used in the source code; 23\% of
    the domain terms used in the source code are present in comments only,
    whereas only 11\% in the identifiers alone, and there is a 63\%
    agreement in the use of domain terms between any two software systems.},
    acceptancerate = {35},
    doi = {10.1109/ICPC.2008.29},
    keywords = {Books, comments, Computer science, domain terms,
    Embedded software, Graphics, Graph theory, identifiers, Open source
    software, Programming profession, software development, Software
    engineering, Software libraries, Software systems, source code,
    Vocabulary},
    timestamp = {2016.08.31},
    }