﻿<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">Explor Digit Health Technol</journal-id>
<journal-id journal-id-type="publisher-id">EDHT</journal-id>
<journal-title-group>
<journal-title>Exploration of Digital Health Technologies</journal-title>
</journal-title-group>
<issn pub-type="epub">2996-9409</issn>
<publisher>
<publisher-name>Open Exploration Publishing</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.37349/edht.2024.00032</article-id>
<article-id pub-id-type="manuscript">101132</article-id>
<article-categories>
<subj-group>
<subject>Original Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Assessing the accuracy and readability of ChatGPT-4 and Gemini in answering oral cancer queries—an exploratory study</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-5349-1091</contrib-id>
<name>
<surname>Diniz-Freitas</surname>
<given-names>Márcio</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing—original draft</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="cor1">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-5727-0920</contrib-id>
<name>
<surname>López-Pintor</surname>
<given-names>Rosa María</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-2040-6617</contrib-id>
<name>
<surname>Santos-Silva</surname>
<given-names>Alan Roger</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-2103-0746</contrib-id>
<name>
<surname>Warnakulasuriya</surname>
<given-names>Saman</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="I6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-1483-401X</contrib-id>
<name>
<surname>Diz-Dios</surname>
<given-names>Pedro</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="editor">
<name>
<surname>Pandey</surname>
<given-names>Gaurav</given-names>
</name>
<role>Academic Editor</role>
<aff>Icahn School of Medicine at Mount Sinai, USA</aff>
</contrib>
</contrib-group>
<aff id="I1">
<sup>1</sup>Medical-Surgical Dentistry Research Group (OMEQUI), Health Research Institute of Santiago de Compostela (IDIS) and University of Santiago de Compostela (USC), 15782 Santiago de Compostela, Spain</aff>
<aff id="I2">
<sup>2</sup>Special Care Dentistry Unit, School of Medicine and Dentistry, University of Santiago de Compostela (USC), 15782 Santiago de Compostela, Spain</aff>
<aff id="I3">
<sup>3</sup>ORALMED Research Group, Department of Dental Clinical Specialties, School of Dentistry, Complutense University, 28040 Madrid, Spain</aff>
<aff id="I4">
<sup>4</sup>Department of Oral Diagnosis, Piracicaba Dental School, University of Campinas (UNICAMP), Piracicaba 13414-903, Brazil</aff>
<aff id="I5">
<sup>5</sup>Faculty of Dentistry, Oral and Craniofacial Sciences, King’s College London, WC2R 2LS London, UK</aff>
<aff id="I6">
<sup>6</sup>WHO Collaborating Centre for Oral Cancer, King’s College London, WC2R 2LS London, UK.</aff>
<author-notes>
<corresp id="cor1">
<bold>
<sup>*</sup>Correspondence:</bold> Márcio Diniz-Freitas, Special Care Dentistry Unit, School of Medicine and Dentistry, University of Santiago de Compostela (USC), 15706 Santiago de Compostela, Spain. <email>marcio.diniz@usc.es</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<pub-date pub-type="epub">
<day>19</day>
<month>11</month>
<year>2024</year>
</pub-date>
<volume>2</volume>
<issue>6</issue>
<fpage>334</fpage>
<lpage>345</lpage>
<history>
<date date-type="received">
<day>27</day>
<month>05</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>11</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>© The Author(s) 2024.</copyright-statement>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This is an Open Access article licensed under a Creative Commons Attribution 4.0 International License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, sharing, adaptation, distribution and reproduction in any medium or format, for any purpose, even commercially, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Aim:</title>
<p id="absp-1">This study aims to evaluate the accuracy and readability of responses generated by two large language models (LLMs) (ChatGPT-4 and Gemini) to frequently asked questions by lay persons (the general public) about signs and symptoms, risk factors, screening, diagnosis, treatment, prevention, and survival in relation to oral cancer.</p>
</sec>
<sec>
<title>Methods:</title>
<p id="absp-2">The accuracy of each response given in the two LLMs was rated by four oral cancer experts, blinded to the source of the responses. The accuracy was rated as 1: complete, 2: correct but insufficient, 3: includes correct and incorrect/outdated information, and 4: completely incorrect. Frequency, mean scores for each question, and overall were calculated. Readability was analyzed using the Flesch Reading Ease and the Flesch-Kincaid Grade Level (FKGL) tests.</p>
</sec>
<sec>
<title>Results:</title>
<p id="absp-3">The mean accuracy scores for ChatGPT-4 responses ranged from 1.00 to 2.00, with an overall mean score of 1.50 (SD 0.36), indicating that responses were usually correct but sometimes insufficient. Gemini responses had mean scores ranging from 1.00 to 1.75, with an overall mean score of 1.20 (SD 0.27), suggesting more complete responses. The Mann-Whitney <italic>U</italic> test revealed a statistically significant difference between the models’ scores (<italic>p</italic> = 0.02), with Gemini outperforming ChatGPT-4 in terms of completeness and accuracy. ChatGPT generally produces content at a lower grade level (average FKGL: 10.3) compared to Gemini (average FKGL: 12.3) (<italic>p</italic> = 0.004).</p>
</sec>
<sec>
<title>Conclusions:</title>
<p id="absp-4">Gemini provides more complete and accurate responses to questions about oral cancer that lay people may seek answers to compared to ChatGPT-4, although its responses were less readable. Further improvements in model training and evaluation consistency are needed to enhance the reliability and utility of LLMs in healthcare settings.</p>
</sec>
</abstract>
<kwd-group>
<kwd>Large language models</kwd>
<kwd>ChatGPT</kwd>
<kwd>Gemini</kwd>
<kwd>oral cancer</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p id="p-1">The potential of artificial intelligence (AI) in dentistry, particularly in oral medicine, is gaining importance [<xref ref-type="bibr" rid="B1">1</xref>]. The recently launched ChatGPT, an AI tool developed by OpenAI, is a model trained on large amounts of data capable of understanding and generating human language with high precision and consistency. On March 14, 2023, OpenAI introduced ChatGPT-4, an updated subscription-based model claiming better performance compared to ChatGPT-3.5, including interpreting images, responding to questions about their content, and generating context-specific descriptions (OpenAI, <ext-link xlink:href="https://cdn.openai.com/papers/GPTV_System_Card.pdf" ext-link-type="uri">https://cdn.openai.com/papers/GPTV_System_Card.pdf</ext-link>).</p>
<p id="p-2">As AI large language models (LLMs) like ChatGPT gained popularity, other models were also developed. Bard, the predecessor of Gemini, was introduced by Google in 2023 as a conversational AI chatbot. Despite its capability to generate text, translate languages, and create various types of creative content, Bard’s development focused primarily on providing informative answers. In 2024, Google rebranded Bard as Gemini, updating it with enhanced abilities to comprehend and respond to complex questions, generate different text formats, and translate languages more accurately and naturally. Gemini represents a significant step in the evolution of AI chatbots (<ext-link xlink:href="https://deepmind.google/technologies/gemini/" ext-link-type="uri">https://deepmind.google/technologies/gemini/</ext-link>).</p>
<p id="p-3">Researchers from different medical specialties have started exploring the utility of LLMs, evaluating especially their accuracy in answering specialty-specific questions [<xref ref-type="bibr" rid="B2">2</xref>]. Prior to or during the time of hospital visits, patients attending Oral Medicine clinics may also consult these LLMs to self-assess their conditions, find answers to queries they have, and further understand the treatment plan proposed by the clinician and any recommended procedures, or to familiarize with what other experts say about their condition and treatments. Additionally, they support multilingual communication through language translation. As LLMs expand and become further customized for oral medicine, they could enhance patient care, improve information accessibility, and contribute to advancements in this dental specialty [<xref ref-type="bibr" rid="B3">3</xref>]. Their potential applications in oral cancer span a wide spectrum, from enhancing preventive strategies to assisting in understanding treatment complications.</p>
<p id="p-4">This study aimed to evaluate the accuracy and readability of responses from two LLMs (ChatGPT-4 and Gemini) to frequently asked questions by lay persons (the general public) who are searching for information and answers to questions about oral cancer. These individuals are typically members of the general public, including those who may be personally affected by oral cancer (and being seen in clinics) or who are seeking information out of concern for themselves or loved ones about signs and symptoms, risk factors, screening and diagnosis, treatment, and survival in relation to oral cancer.</p>
</sec>
<sec id="s2">
<title>Materials and methods</title>
<sec id="t2-1">
<title>Selection of questions</title>
<p id="p-5">The summary of the study workflow is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>. To identify possible queries from non-experts in oral cancer, a Google search using the keywords “frequently asked questions about oral cancer” AND “common oral cancer questions” was conducted in March 2024 with the virtual private network (VPN) disabled. The search yielded a number of queries based on patient guidelines developed by institutions, hospitals, and scientific societies like the Mayo Clinic, National Institute of Dental and Craniofacial Research, American Dental Association (ADA), American Head and Neck Society, Moffitt Cancer Center, the American College of Prosthodontists, and British Columbia Cancer Center. Two researchers (MDF, PDD) selected 15 questions covering various topics, including oral cancer detection, signs and symptoms, screening and diagnosis, treatment, treatment complications, prognosis, and survival (<xref ref-type="table" rid="t1">Table 1</xref>). These questions were entered into ChatGPT-4 (April 23, 2023 version, OpenAI) and Google Gemini AI (Google) using the “New Chat” function for each question.</p>
<fig id="fig1" position="float">
<label>Figure 1</label>
<caption>
<p id="fig1-p-1">
<bold>Study workflow.</bold> FKGL: Flesch-Kincaid Grade Level; FRE: Flesch Reading Ease; *: 1: complete, 2: correct but insufficient, 3: includes correct and incorrect/outdated information, 4: completely incorrect</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="edht-02-101132-g001.tif" />
</fig>
<table-wrap id="t1">
<label>Table 1</label>
<caption>
<p id="t1-p-1">
<bold>Frequently asked questions about oral cancer</bold>
</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>
<bold>Questions about oral cancer</bold>
</th>
</tr>
</thead>
<tbody>
<tr>
<td>1. What is oral cancer screening?<break />2. How often should I have an oral cancer screen?<break />3. What are the benefits of oral cancer screening?<break />4. How long does oral cancer screening take?<break />5. How common is oral cancer?<break />6. What are the early signs and symptoms of oral cancer?<break />7. What are the early signs of lip cancer?<break />8. How is oral cancer diagnosed?<break />9. What are the risk factors for developing oral cancer?<break />10. How could one prevent oral cancer?<break />11. What treatment options are available for oral cancer?<break />12. What are the side effects of oral cancer treatments?<break />13. What is the prognosis and survival rate for oral cancer?<break />14. How can lifestyle changes impact the progression or recurrence of oral cancer?<break />15. What follow-up care is necessary after oral cancer treatment?</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="t2-2">
<title>Evaluation of responses</title>
<p id="p-6">The accuracy of each response was rated using the following scores: 1: complete, 2: correct but insufficient, 3: includes correct and incorrect/outdated information, 4: completely incorrect [<xref ref-type="bibr" rid="B4">4</xref>]. The evaluation was performed by four experts (PDD, RMLP, ARS, and SW) who have experience in managing oral cancer patients. These experts were selected based on their specific postgraduate training in oral medicine, their authorship of numerous research articles on oral cancer, and their more than 20 years of clinical experience in the field. They were unaware of the source of the responses. The average score of all four experts to evaluate the accuracy of each response (and then the overall score) was calculated. Additionally, the concordance among the evaluators was calculated as a percentage.</p>
<p id="p-7">Readability was assessed using the Flesch Reading Ease (FRE) and the Flesch-Kincaid Grade Level (FKGL) indices, which evaluate readability by incorporating average sentence length and syllables per word. FRE scores range from 0 to 100, with scores above 80 indicating conversational English. FKGL scores indicate the approximate USA education level needed to understand the text. The analysis used an online free-access readability scoring tool (<ext-link xlink:href="https://readabilityformulas.com/" ext-link-type="uri">https://readabilityformulas.com/</ext-link>). Ethical approval was not necessary as no human or animal subjects were involved in the study.</p>
</sec>
<sec id="t2-3">
<title>Statistical analysis</title>
<p id="p-8">Each of the 4 experts rated the 15 responses provided by each LLM resulting in a total of 60 evaluations per LLM. Frequency, mean scores, and standard deviation (SD) for each question and overall were calculated. The Shapiro-Wilk test was used to determine the normality of the distribution of the continuous variables. The non-parametric Mann-Whitney <italic>U</italic> test was used to evaluate any differences between the mean scores of ChatGPT-4 and Gemini. Additionally, the Student’s <italic>t</italic>-test for independent samples was used to analyze possible differences in readability scores between ChatGPT-4 and Gemini responses. Statistical significance was set at <italic>p</italic> ≤ 0.05.</p>
</sec>
</sec>
<sec id="s3">
<title>Results</title>
<sec id="t3-1">
<title>ChatGPT-4 answers</title>
<p id="p-9">The distribution of response accuracy scores for ChatGPT-4 is shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>. Agreement rate on the scores among researchers was 71.6%. Most responses (65%) received the highest score (Score 1), while Scores 2 and 3 were given to 20% and 15% of responses, respectively. No response was rated as completely incorrect (Score 4). These results indicate predominantly positive evaluations of the responses by this LLM, suggesting that although generally accurate and relevant, there is still room for improvement in detail and information currency.</p>
<fig id="fig2" position="float">
<label>Figure 2</label>
<caption>
<p id="fig2-p-1">
<bold>Distribution of scores assigned by the four evaluators for each response provided by ChatGPT-4</bold>
</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="edht-02-101132-g002.tif" />
</fig>
<p id="p-10">The mean score of responses obtained via ChatGPT-4 was 1.50 (SD 0.36), indicating that on average, ChatGPT-4 responses are correct but, on several counts, insufficient (<xref ref-type="fig" rid="fig3">Figure 3</xref>). Most responses (65%) received a score of 1, 20% received a score of 2, and 15% received a score of 3, with none receiving a score of 4. The scores suggest that while some responses are complete, there are areas where the provided information could be improved in accuracy and comprehensiveness.</p>
<fig id="fig3" position="float">
<label>Figure 3</label>
<caption>
<p id="fig3-p-1">
<bold>Mean scores of responses obtained via ChatGPT-4</bold>
</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="edht-02-101132-g003.tif" />
</fig>
</sec>
<sec id="t3-2">
<title>Gemini answers</title>
<p id="p-11">The distribution of response accuracy scores for Gemini is shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>. The mean score of responses obtained via Gemini was 1.20 (SD 0.27). Agreement rate among evaluators was 83.3%. Most responses (80%) received the highest score (Score 1), while Score 2 was assigned to the remaining 20% of responses. No Scores 3 or 4 were assigned. These results indicate a high level of satisfaction with Gemini’s responses, with no reports of incorrect information.</p>
<fig id="fig4" position="float">
<label>Figure 4</label>
<caption>
<p id="fig4-p-1">
<bold>Distribution of scores assigned by the four evaluators for each response provided by Gemini</bold>
</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="edht-02-101132-g004.tif" />
</fig>
<p id="p-12">The overall mean score of 1.2 (SD 0.27), indicates that Gemini responses are generally more complete than insufficient (<xref ref-type="fig" rid="fig5">Figure 5</xref>). Specifically, 11 of the 15 questions received a mean score of 1.00.</p>
<fig id="fig5" position="float">
<label>Figure 5</label>
<caption>
<p id="fig5-p-1">
<bold>Mean scores of responses obtained via Gemini</bold>
</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="edht-02-101132-g005.tif" />
</fig>
<p id="p-13">Gemini appears to provide more complete and accurate responses compared to ChatGPT-4 (<italic>p</italic> = 0.02).</p>
</sec>
<sec id="t3-3">
<title>Readability scores</title>
<p id="p-14">Although FRE scores differed with ChatGPT averaging 46.8 and Gemini 42.2, this analysis confirms that ChatGPT responses were generally easier to understand than those provided by Gemini, but this difference was not statistically significant (<italic>p</italic> = 0.200). Statistical analysis showed significant differences in FKGL scores between the two LLMs (<italic>p</italic> = 0.004), indicating that ChatGPT generally produces content at a lower grade level (average FKGL: 10.3) compared to Gemini (average FKGL: 12.3). This suggests that ChatGPT content is more accessible to a broader audience.</p>
</sec>
</sec>
<sec id="s4">
<title>Discussion</title>
<p id="p-15">The internet has transformed how patients access medical information, reconfiguring the dynamics of patient empowerment and communication with physicians. However, while free access to medical knowledge can be helpful in certain circumstances, not all online sources are reliable, and the general public and patients may encounter incorrect or misleading information leading to a state of confusion or self-misdiagnosis [<xref ref-type="bibr" rid="B5">5</xref>].</p>
<p id="p-16">Cancer patients face numerous challenges throughout their cancer journey, ranging from emotional stress, functional disability, and treatment-related side effects, and would engage in assimilating complex medical information [<xref ref-type="bibr" rid="B6">6</xref>]. Patient education and support are crucial components of comprehensive oral cancer care, establishing a solid foundation for patients to actively participate in their treatment plans and face the associated challenges [<xref ref-type="bibr" rid="B7">7</xref>].</p>
<p id="p-17">Many patients report that the information provided by healthcare professionals is not always clear and that at times they feel uncertain about asking questions from their healthcare provider. As a result, they often turn to the Internet to search for health-related information [<xref ref-type="bibr" rid="B8">8</xref>].</p>
<p id="p-18">One of the most promising applications of AI in medicine is the development of conversational agents (chatbots) that provide information and support to the general public and patients to manage their health conditions [<xref ref-type="bibr" rid="B9">9</xref>]. Johnson et al. (2023) [<xref ref-type="bibr" rid="B10">10</xref>] found that ChatGPT provides accurate information about common cancer myths and misconceptions similar to that provided on the National Cancer Institute (NCI) webpage.</p>
<p id="p-19">Considering the complex nature of oral cancer, patient education and awareness are paramount. However, online information about oral cancer was shown to be of poor quality [<xref ref-type="bibr" rid="B11">11</xref>] and difficult to read even before the advent of ChatGPT [<xref ref-type="bibr" rid="B12">12</xref>]. Alcaide-Raya et al. (2010) [<xref ref-type="bibr" rid="B13">13</xref>] found that information on oral premalignant disorders could at times be confusing to lay persons as many web pages appeared to be designed primarily for the healthcare professional rather than for the general public. Hassona et al. (2024) [<xref ref-type="bibr" rid="B14">14</xref>] evaluated ChatGPT’s effectiveness in providing information on the early detection of oral cancer using 108 questions from expert sources. ChatGPT responses were rated for quality, reliability, readability, and usefulness, with most responses considered very useful and scoring high in quality and reliability. However, there were concerns about readability and actionable information, indicating that while ChatGPT is a promising resource for informing patients, it still needs improvements to be fully resourceful in this area [<xref ref-type="bibr" rid="B14">14</xref>]. Providing comprehensive education before, during, and after cancer treatment has been demonstrated to enhance quality of life, decrease the frequency of emergency department visits, and minimize hospital admissions [<xref ref-type="bibr" rid="B15">15</xref>].</p>
<p id="p-20">In a recent review of generative AI applications in healthcare, Moulaei et al. (2024) [<xref ref-type="bibr" rid="B2">2</xref>] found that ChatGPT and Google Bard (Gemini) were the most used LLMs. The study identified 24 different applications of generative AI in healthcare, with the most common being providing information about health conditions through question responses and disease diagnosis and prediction [<xref ref-type="bibr" rid="B2">2</xref>].</p>
<p id="p-21">Giannakopoulos et al. (2023) [<xref ref-type="bibr" rid="B16">16</xref>] compared responses from four LLMs (Bard Google LLC, ChatGPT-3.5, ChatGPT-4 OpenAI, and Bing Chat Microsoft Corp) to clinically relevant questions in dentistry. While ChatGPT-4 statistically outperformed ChatGPT-3.5, Bing Chat, and Bard, all models occasionally exhibited inaccuracies, generalities, outdated content, and a lack of source references. Evaluators noted instances where LLMs provided irrelevant information, vague responses, or information that was not entirely accurate [<xref ref-type="bibr" rid="B16">16</xref>].</p>
<p id="p-22">In evaluating the accuracy of LLMs in answering questions about oral cancer, it is crucial to recognize the differences in their design and training focus. ChatGPT excels in generating coherent and contextually relevant responses based on vast textual data, making it effective in providing information on well-established medical knowledge [<xref ref-type="bibr" rid="B17">17</xref>]. In contrast, Gemini (previously Google Bard) might synthesize information across modalities, such as integrating textual and visual data, which could offer more nuanced or interdisciplinary insights; moreover, Gemini benefits from accessing the most current data, offering users more timely relevant information [<xref ref-type="bibr" rid="B18">18</xref>]. However, both models’ accuracy in the domain of oral cancer is fundamentally dependent on the quality and scope of the data they were trained on, and ongoing validation against clinical guidelines and expert consensus remains essential for reliable application in healthcare contexts.</p>
<p id="p-23">The challenge in the readability of chatbot responses may hinder their accessibility and ease of use by the general public. Although the readability scores suggested that ChatGPT’s responses were generally easier to understand than those provided by Gemini, on the FRE scale, ChatGPT’s content falls into the “fairly difficult” range, typically suited for readers at a 10th to 12th-grade level, while Gemini’s content, being slightly more difficult, aligns with a similar range but closer to a more challenging level. On the other hand, FKGL scores revealed a significant difference between the two models. ChatGPT generally produces content at a lower grade level, making it more accessible to high school students, while Gemini’s content is written at a higher grade level, more appropriate for early college students. This indicates that ChatGPT’s content is more accessible to a broader audience, whereas Gemini’s content requires a higher level of reading proficiency.</p>
<p id="p-24">To date, there is a limited body of research comparing the readability of responses generated by both models within the field of dentistry. De Souza et al. (2024) [<xref ref-type="bibr" rid="B19">19</xref>] assessed the performance of several LLMs including ChatGPT-3.5, ChatGPT-4, and Google Bard (Gemini) in answering 10 questions covering a wide range of topics relevant to head and neck cancers, sourced from the NCI. The results showed that Google Bard (Gemini) aligned more closely with the NCI’s readability standards, suggesting its effectiveness in delivering detailed yet accessible information [<xref ref-type="bibr" rid="B19">19</xref>].</p>
<p id="p-25">On the other hand, Dursun and Bilici Geçer (2024) [<xref ref-type="bibr" rid="B20">20</xref>] evaluated the readability of responses generated by ChatGPT-3.5, ChatGPT-4, Gemini, and Copilot concerning orthodontic clear aligners. Their results indicate that Gemini’s responses are notably more readable compared to those of the other chatbots [<xref ref-type="bibr" rid="B20">20</xref>]. This discrepancy highlights that the readability of AI-generated content may vary depending on the specific context and subject matter, suggesting that further research is needed to fully understand the factors influencing these differences in readability across different applications.</p>
<p id="p-26">In the context of patient-centered care, which emphasizes the significance of a well-informed patient actively engaging in decision-making, the provision of effective information is crucial. An important factor to consider is to ensure that it is comprehensible to the intended population [<xref ref-type="bibr" rid="B21">21</xref>]. This is particularly relevant in the context of oral cancer, once social determinants of health, such as low socioeconomic status, including low educational attainment, are strongly associated with a higher risk and diagnostic delay [<xref ref-type="bibr" rid="B22">22</xref>–<xref ref-type="bibr" rid="B25">25</xref>]. García-Valencia et al. (2024) [<xref ref-type="bibr" rid="B26">26</xref>] highlight the substantial potential of advanced AI models, such as ChatGPT-3.5 and 4, in bridging language gaps within the healthcare sector. By delivering high-quality translations that ensure both accuracy and cultural sensitivity, these tools can significantly improve the accessibility of medical information, particularly for underserved, non-English-speaking communities [<xref ref-type="bibr" rid="B26">26</xref>].</p>
<p id="p-27">Differences in the content responses and readability between the two models may be attributable to the distinct algorithms employed by different LLM chatbots [<xref ref-type="bibr" rid="B27">27</xref>]. However, comprehensive information regarding the dataset and software methods utilized for both models has not been disclosed in detail, owing to trade secrets and confidentiality concerns. Consequently, there is insufficient data to clarify the differences between these two AI models. To enhance transparency and foster a better understanding of AI, it is crucial for leading developers to share more information on technical aspects [<xref ref-type="bibr" rid="B28">28</xref>].</p>
<p id="p-28">In recent years, the potential applications of LLMs in medicine, and more specifically in dentistry, have generated significant interest within the scientific community. However, their implementation in real-world settings remains limited [<xref ref-type="bibr" rid="B29">29</xref>]. LLM models could help patients understand their oral cancer diagnoses, treatment options, and self-care by providing easy-to-understand information, leading to more informed interactions with healthcare providers [<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>].</p>
<p id="p-29">One of the limitations of ChatGPT-4 and Gemini is their reliance on internet-based data, which could potentially lead to inaccuracies if the information sources are outdated or biased. To mitigate this issue, it is essential that these models undergo continuous updates using validated medical sources, with regular oversight from healthcare professionals to ensure the accuracy and reliability of the information provided. The LLMs provide general, but not personalized, medical guidance. They should be considered complementary tools and should not replace the information given to patients by medical professionals [<xref ref-type="bibr" rid="B32">32</xref>].</p>
<p id="p-30">On the other hand, these models can assist healthcare providers to identify early signs of oral cancer and to make decisions in primary care settings, especially when professionals with expertise in oral medicine are not available. Indeed, LLM models can be used for triage by assessing symptom severity, helping prioritize urgent cases, and recommending appropriate actions, which is particularly useful for optimizing healthcare resources [<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>].</p>
<p id="p-31">From the perspective of healthcare professionals, AI systems can serve as a valuable complement and support, alleviating the workload on healthcare staff [<xref ref-type="bibr" rid="B35">35</xref>]. By leveraging these advanced technologies, healthcare providers can enhance the quality of patient care while simultaneously minimizing the need for unnecessary in-person interactions [<xref ref-type="bibr" rid="B36">36</xref>].</p>
<sec id="t4-1">
<title>Limitations</title>
<p id="p-32">A limitation of this study is that only questions and answers provided in English language were evaluated. Considering that oral cancer is more prevalent in Asian-Pacific countries the responses may not suit these non-English-speaking countries [<xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>]. Additionally, the study was based on a relatively small set of 15 oral cancer-related questions, which may not fully capture the range of possible responses and scenarios that these models could encounter. While this was sufficient for a preliminary evaluation, we acknowledge that a larger set of questions would provide a more comprehensive assessment. Future studies should consider expanding the number of questions and including multiple languages to enhance the robustness and generalizability of the findings. As this was a cross-sectional study that evaluated specific versions of two LLMs our analysis of the reliability and validity of the results was limited.</p>
<p id="p-33">A strength of the study is that, unlike previous studies, it compared responses from two of the most used LLMs in scientific literature, with responses evaluated by four different experts in oral cancer and the evaluation of medical data on the internet. It should be noted that no universally accepted reference standards or quality criteria exist for this purpose.</p>
</sec>
<sec id="t4-2">
<title>Conclusions</title>
<p id="p-34">Although Gemini showed slightly superior performance in answering questions related to oral cancer, its responses were less readable. This suggests that its texts are less accessible to lay persons, which is crucial as content accessibility can significantly influence the understanding and usefulness of the information provided. LLMs are a new resource with undeniable potential that will eventually be implemented in clinical practice. This opportunity may require professionals to reconsider the doctor-patient relationship, the quality and accuracy of advice provided, and even the decision-making process.</p>
</sec>
</sec>
</body>
<back>
<glossary>
<title>Abbreviations</title>
<def-list>
<def-item>
<term>AI</term>
<def>
<p>artificial intelligence</p>
</def>
</def-item>
<def-item>
<term>FKGL</term>
<def>
<p>Flesch-Kincaid Grade Level</p>
</def>
</def-item>
<def-item>
<term>FRE</term>
<def>
<p>Flesch Reading Ease</p>
</def>
</def-item>
<def-item>
<term>LLMs</term>
<def>
<p>large language models</p>
</def>
</def-item>
<def-item>
<term>NCI</term>
<def>
<p>National Cancer Institute</p>
</def>
</def-item>
<def-item>
<term>SD</term>
<def>
<p>standard deviation</p>
</def>
</def-item>
</def-list>
</glossary>
<sec id="s5">
<title>Declarations</title>
<sec id="t-5-1">
<title>Acknowledgments</title>
<p>Declaration of generative AI and AI-assisted technologies in the writing process: During the preparation of this work the author(s) used ChatGPT in order to improve readability and language. After using this tool/service, the authors reviewed and edited the content as needed and took full responsibility for the content of the publication.</p>
</sec>
<sec id="t-5-2">
<title>Author contributions</title>
<p>MDF: Conceptualization, Investigation, Formal analysis, Writing—original draft, Writing—review &amp; editing. RMLP, ARSS, and SW: Investigation, Writing—review &amp; editing. PDD: Conceptualization, Investigation, Formal analysis, Writing—review &amp; editing. All authors read and approved the submitted version.</p>
</sec>
<sec id="t-5-3" sec-type="COI-statement">
<title>Conflicts of interest</title>
<p>Saman Warnakulasuriya who is the Guest Editor of Exploration of Digital Health Technologies had no involvement in the decision-making or the review process of this manuscript.</p>
</sec>
<sec id="t-5-4">
<title>Ethical approval</title>
<p>Not applicable.</p>
</sec>
<sec id="t-5-5">
<title>Consent to participate</title>
<p>Not applicable.</p>
</sec>
<sec id="t-5-6">
<title>Consent to publication</title>
<p>Not applicable.</p>
</sec>
<sec id="t-5-7" sec-type="data-availability">
<title>Availability of data and materials</title>
<p>The data are available upon request to the corresponding author.</p>
</sec>
<sec id="t-5-8">
<title>Funding</title>
<p>Not applicable.</p>
</sec>
<sec id="t-5-9">
<title>Copyright</title>
<p>© The Author(s) 2024.</p>
</sec>
</sec>
<ref-list>
<ref id="B1">
<label>1</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patil</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Albogami</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Hosmani</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Mujoo</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kamil</surname>
<given-names>MA</given-names>
</name>
<name>
<surname>Mansour</surname>
<given-names>MA</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Artificial Intelligence in the Diagnosis of Oral Diseases: Applications and Pitfalls</article-title>
<source>Diagnostics (Basel)</source>
<year iso-8601-date="2022">2022</year>
<volume>12</volume>
<elocation-id>1029</elocation-id>
<pub-id pub-id-type="doi">10.3390/diagnostics12051029</pub-id>
<pub-id pub-id-type="pmid">35626185</pub-id>
<pub-id pub-id-type="pmcid">PMC9139975</pub-id>
</element-citation>
</ref>
<ref id="B2">
<label>2</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moulaei</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Yadegari</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Baharestani</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Farzanbakhsh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sabet</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Afrash</surname>
<given-names>MR</given-names>
</name>
</person-group>
<article-title>Generative artificial intelligence in healthcare: A scoping review on benefits, challenges and applications</article-title>
<source>Int J Med Inform</source>
<year iso-8601-date="2024">2024</year>
<volume>188</volume>
<elocation-id>105474</elocation-id>
<pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105474</pub-id>
<pub-id pub-id-type="pmid">38733640</pub-id>
</element-citation>
</ref>
<ref id="B3">
<label>3</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>De</surname>
<given-names>Souza LL</given-names>
</name>
<name>
<surname>Lopes</surname>
<given-names>MA</given-names>
</name>
<name>
<surname>Santos-Silva</surname>
<given-names>AR</given-names>
</name>
<name>
<surname>Vargas</surname>
<given-names>PA</given-names>
</name>
</person-group>
<article-title>The potential of ChatGPT in oral medicine: a new era of patient care?</article-title>
<source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>
<year iso-8601-date="2024">2024</year>
<volume>137</volume>
<fpage>1</fpage>
<lpage>2</lpage>
<pub-id pub-id-type="doi">10.1016/j.oooo.2023.09.010</pub-id>
<pub-id pub-id-type="pmid">37968192</pub-id>
</element-citation>
</ref>
<ref id="B4">
<label>4</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yeo</surname>
<given-names>YH</given-names>
</name>
<name>
<surname>Samaan</surname>
<given-names>JS</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>WH</given-names>
</name>
<name>
<surname>Ting</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Trivedi</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Vipani</surname>
<given-names>A</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Assessing the performance of ChatGPT in answering questions regarding cirrhosis and hepatocellular carcinoma</article-title>
<source>Clin Mol Hepatol</source>
<year iso-8601-date="2023">2023</year>
<volume>29</volume>
<fpage>721</fpage>
<lpage>32</lpage>
<pub-id pub-id-type="doi">10.3350/cmh.2023.0089</pub-id>
<pub-id pub-id-type="pmid">36946005</pub-id>
<pub-id pub-id-type="pmcid">PMC10366809</pub-id>
</element-citation>
</ref>
<ref id="B5">
<label>5</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hartzband</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Groopman</surname>
<given-names>J</given-names>
</name>
</person-group>
<article-title>Untangling the Web—patients, doctors, and the Internet</article-title>
<source>N Engl J Med</source>
<year iso-8601-date="2010">2010</year>
<volume>362</volume>
<fpage>1063</fpage>
<lpage>6</lpage>
<pub-id pub-id-type="doi">10.1056/NEJMp0911938</pub-id>
<pub-id pub-id-type="pmid">20335581</pub-id>
</element-citation>
</ref>
<ref id="B6">
<label>6</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cè</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Chiarpenello</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Bubba</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Felisaz</surname>
<given-names>PF</given-names>
</name>
<name>
<surname>Oliva</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Irmici</surname>
<given-names>G</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Exploring the Role of ChatGPT in Oncology: Providing Information and Support for Cancer Patients</article-title>
<source>BioMedInformatics</source>
<year iso-8601-date="2024">2024</year>
<volume>4</volume>
<fpage>877</fpage>
<lpage>88</lpage>
<pub-id pub-id-type="doi">10.3390/biomedinformatics4020049</pub-id>
</element-citation>
</ref>
<ref id="B7">
<label>7</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmed</surname>
<given-names>SK</given-names>
</name>
</person-group>
<article-title>The future of oral cancer care: Integrating ChatGPT into clinical practice</article-title>
<source>Oral Oncol Rep</source>
<year iso-8601-date="2024">2024</year>
<volume>10</volume>
<elocation-id>100317</elocation-id>
<pub-id pub-id-type="doi">10.1016/j.oor.2024.100317</pub-id>
</element-citation>
</ref>
<ref id="B8">
<label>8</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daraz</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Morrow</surname>
<given-names>AS</given-names>
</name>
<name>
<surname>Ponce</surname>
<given-names>OJ</given-names>
</name>
<name>
<surname>Beuschel</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Farah</surname>
<given-names>MH</given-names>
</name>
<name>
<surname>Katabi</surname>
<given-names>A</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Can Patients Trust Online Health Information? A Meta-narrative Systematic Review Addressing the Quality of Health Information on the Internet</article-title>
<source>J Gen Intern Med</source>
<year iso-8601-date="2019">2019</year>
<volume>34</volume>
<fpage>1884</fpage>
<lpage>91</lpage>
<pub-id pub-id-type="doi">10.1007/s11606-019-05109-0</pub-id>
<pub-id pub-id-type="pmid">31228051</pub-id>
<pub-id pub-id-type="pmcid">PMC6712138</pub-id>
</element-citation>
</ref>
<ref id="B9">
<label>9</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>De</surname>
<given-names>Souza LL</given-names>
</name>
<name>
<surname>Fonseca</surname>
<given-names>FP</given-names>
</name>
<name>
<surname>Martins</surname>
<given-names>MD</given-names>
</name>
<name>
<surname>De</surname>
<given-names>Almeida OP</given-names>
</name>
<name>
<surname>Pontes</surname>
<given-names>HAR</given-names>
</name>
<name>
<surname>Coracin</surname>
<given-names>FL</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>ChatGPT and medicine: A potential threat to science or a step towards the future?</article-title>
<source>J Med Artif Intell</source>
<year iso-8601-date="2023">2023</year>
<volume>6</volume>
<elocation-id>19</elocation-id>
<pub-id pub-id-type="doi">10.21037/jmai-23-70</pub-id>
</element-citation>
</ref>
<ref id="B10">
<label>10</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname>
<given-names>SB</given-names>
</name>
<name>
<surname>King</surname>
<given-names>AJ</given-names>
</name>
<name>
<surname>Warner</surname>
<given-names>EL</given-names>
</name>
<name>
<surname>Aneja</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kann</surname>
<given-names>BH</given-names>
</name>
<name>
<surname>Bylund</surname>
<given-names>CL</given-names>
</name>
</person-group>
<article-title>Using ChatGPT to evaluate cancer myths and misconceptions: artificial intelligence and cancer information</article-title>
<source>JNCI Cancer Spectr</source>
<year iso-8601-date="2023">2023</year>
<volume>7</volume>
<elocation-id>pkad015</elocation-id>
<pub-id pub-id-type="doi">10.1093/jncics/pkad015</pub-id>
<pub-id pub-id-type="pmid">36929393</pub-id>
<pub-id pub-id-type="pmcid">PMC10020140</pub-id>
</element-citation>
</ref>
<ref id="B11">
<label>11</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>López-Jornet</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Camacho-Alonso</surname>
<given-names>F</given-names>
</name>
</person-group>
<article-title>The quality of internet sites providing information relating to oral cancer</article-title>
<source>Oral Oncol</source>
<year iso-8601-date="2009">2009</year>
<volume>45</volume>
<fpage>e95</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.1016/j.oraloncology.2009.03.017</pub-id>
<pub-id pub-id-type="pmid">19457707</pub-id>
</element-citation>
</ref>
<ref id="B12">
<label>12</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varela-Centelles</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Ledesma-Ludi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Seoane-Romero</surname>
<given-names>JM</given-names>
</name>
<name>
<surname>Seoane</surname>
<given-names>J</given-names>
</name>
</person-group>
<article-title>Information about oral cancer on the Internet: our patients cannot understand it</article-title>
<source>Br J Oral Maxillofac Surg</source>
<year iso-8601-date="2015">2015</year>
<volume>53</volume>
<fpage>393</fpage>
<lpage>5</lpage>
<pub-id pub-id-type="doi">10.1016/j.bjoms.2015.01.020</pub-id>
<pub-id pub-id-type="pmid">25703184</pub-id>
</element-citation>
</ref>
<ref id="B13">
<label>13</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alcaide-Raya</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Hughes</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Warnakulasuriya</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>How well informed are our Internet savvy patients on oral premalignant disorders?</article-title>
<source>Oral Surg</source>
<year iso-8601-date="2010">2010</year>
<volume>3</volume>
<fpage>120</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.1111/j.1752-248X.2010.01094.x</pub-id>
</element-citation>
</ref>
<ref id="B14">
<label>14</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hassona</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Alqaisi</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Al-Haddad</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Georgakopoulou</surname>
<given-names>EA</given-names>
</name>
<name>
<surname>Malamos</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Alrashdan</surname>
<given-names>MS</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>How good is ChatGPT at answering patients’ questions related to early detection of oral (mouth) cancer?</article-title>
<source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>
<year iso-8601-date="2024">2024</year>
<volume>138</volume>
<fpage>269</fpage>
<lpage>78</lpage>
<pub-id pub-id-type="doi">10.1016/j.oooo.2024.04.010</pub-id>
<pub-id pub-id-type="pmid">38714483</pub-id>
</element-citation>
</ref>
<ref id="B15">
<label>15</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giuliani</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Papadakos</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Papadakos</surname>
<given-names>J</given-names>
</name>
</person-group>
<article-title>Propelling a New Era of Patient Education into Practice—Cancer Care Post-COVID-19</article-title>
<source>Int J Radiat Oncol Biol Phys</source>
<year iso-8601-date="2020">2020</year>
<volume>108</volume>
<fpage>404</fpage>
<lpage>6</lpage>
<pub-id pub-id-type="doi">10.1016/j.ijrobp.2020.05.036</pub-id>
<pub-id pub-id-type="pmid">32890521</pub-id>
<pub-id pub-id-type="pmcid">PMC7462896</pub-id>
</element-citation>
</ref>
<ref id="B16">
<label>16</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giannakopoulos</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Kavadella</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Salim</surname>
<given-names>AA</given-names>
</name>
<name>
<surname>Stamatopoulos</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Kaklamanos</surname>
<given-names>EG</given-names>
</name>
</person-group>
<article-title>Evaluation of the Performance of Generative AI Large Language Models ChatGPT, Google Bard, and Microsoft Bing Chat in Supporting Evidence-Based Dentistry: Comparative Mixed Methods Study</article-title>
<source>J Med Internet Res</source>
<year iso-8601-date="2023">2023</year>
<volume>25</volume>
<elocation-id>e51580</elocation-id>
<pub-id pub-id-type="doi">10.2196/51580</pub-id>
<pub-id pub-id-type="pmid">38009003</pub-id>
<pub-id pub-id-type="pmcid">PMC10784979</pub-id>
</element-citation>
</ref>
<ref id="B17">
<label>17</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sabri</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Saleh</surname>
<given-names>MHA</given-names>
</name>
<name>
<surname>Hazrati</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Merchant</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Misch</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>PS</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Performance of three artificial intelligence (AI)-based large language models in standardized testing; implications for AI-assisted dental education</article-title>
<source>J Periodontal Res</source>
<year iso-8601-date="2024">2024</year>
<volume>[Epub ahead of print]</volume>
<pub-id pub-id-type="doi">10.1111/jre.13323</pub-id>
<pub-id pub-id-type="pmid">39030766</pub-id>
</element-citation>
</ref>
<ref id="B18">
<label>18</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thapa</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Adhikari</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>ChatGPT, Bard, and Large Language Models for Biomedical Research: Opportunities and Pitfalls</article-title>
<source>Ann Biomed Eng</source>
<year iso-8601-date="2023">2023</year>
<volume>51</volume>
<fpage>2647</fpage>
<lpage>51</lpage>
<pub-id pub-id-type="doi">10.1007/s10439-023-03284-0</pub-id>
<pub-id pub-id-type="pmid">37328703</pub-id>
</element-citation>
</ref>
<ref id="B19">
<label>19</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>De</surname>
<given-names>Souza LL</given-names>
</name>
<name>
<surname>Santos-Silva</surname>
<given-names>AR</given-names>
</name>
<name>
<surname>Hagag</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Alzahem</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Vargas</surname>
<given-names>PA</given-names>
</name>
<name>
<surname>Lopes</surname>
<given-names>MA</given-names>
</name>
</person-group>
<article-title>Evaluating AI models in head and neck cancer research: the use of NCI data by ChatGPT 3.5, ChatGPT 4.0, Google Bard, and Bing Chat</article-title>
<source>Oral Surg Oral Med Oral Pathol Oral Radiol</source>
<year iso-8601-date="2024">2024</year>
<volume>138</volume>
<fpage>453</fpage>
<lpage>7</lpage>
<pub-id pub-id-type="doi">10.1016/j.oooo.2024.05.012</pub-id>
<pub-id pub-id-type="pmid">38910103</pub-id>
</element-citation>
</ref>
<ref id="B20">
<label>20</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dursun</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Bilici</surname>
<given-names>Geçer R</given-names>
</name>
</person-group>
<article-title>Can artificial intelligence models serve as patient information consultants in orthodontics?</article-title>
<source>BMC Med Inform Decis Mak</source>
<year iso-8601-date="2024">2024</year>
<volume>24</volume>
<elocation-id>211</elocation-id>
<pub-id pub-id-type="doi">10.1186/s12911-024-02619-8</pub-id>
<pub-id pub-id-type="pmid">39075513</pub-id>
<pub-id pub-id-type="pmcid">PMC11285120</pub-id>
</element-citation>
</ref>
<ref id="B21">
<label>21</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>López-Jornet</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Camacho-Alonso</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Miñano</surname>
<given-names>FM</given-names>
</name>
<name>
<surname>Sanchez-Siles</surname>
<given-names>M</given-names>
</name>
</person-group>
<article-title>Evaluation of the different strategies to oral cancer knowledge: a randomized controlled study</article-title>
<source>Psychooncology</source>
<year iso-8601-date="2013">2013</year>
<volume>22</volume>
<fpage>1618</fpage>
<lpage>23</lpage>
<pub-id pub-id-type="doi">10.1002/pon.3189</pub-id>
<pub-id pub-id-type="pmid">22962026</pub-id>
</element-citation>
</ref>
<ref id="B22">
<label>22</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conway</surname>
<given-names>DI</given-names>
</name>
<name>
<surname>Petticrew</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Marlborough</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Berthiller</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Hashibe</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Macpherson</surname>
<given-names>LMD</given-names>
</name>
</person-group>
<article-title>Socioeconomic inequalities and oral cancer risk: a systematic review and meta-analysis of case-control studies</article-title>
<source>Int J Cancer</source>
<year iso-8601-date="2008">2008</year>
<volume>122</volume>
<fpage>2811</fpage>
<lpage>9</lpage>
<pub-id pub-id-type="doi">10.1002/ijc.23430</pub-id>
<pub-id pub-id-type="pmid">18351646</pub-id>
</element-citation>
</ref>
<ref id="B23">
<label>23</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Warnakulasuriya</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>Significant oral cancer risk associated with low socioeconomic status</article-title>
<source>Evid Based Dent</source>
<year iso-8601-date="2009">2009</year>
<volume>10</volume>
<fpage>4</fpage>
<lpage>5</lpage>
<pub-id pub-id-type="doi">10.1038/sj.ebd.6400623</pub-id>
<pub-id pub-id-type="pmid">19322216</pub-id>
</element-citation>
</ref>
<ref id="B24">
<label>24</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dourado</surname>
<given-names>Martins J</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>Mascarenhas Andrade J</given-names>
</name>
<name>
<surname>Souza</surname>
<given-names>Freitas V</given-names>
</name>
<name>
<surname>De</surname>
<given-names>Araújo TM</given-names>
</name>
</person-group>
<article-title>Social determinants of health and the occurrence of oral cancer: a systematic literature review</article-title>
<source>Rev Salud Publica (Bogota)</source>
<year iso-8601-date="2014">2014</year>
<volume>16</volume>
<fpage>786</fpage>
<lpage>98. Portuguese</lpage>
<pub-id pub-id-type="pmid">26120762</pub-id>
</element-citation>
</ref>
<ref id="B25">
<label>25</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Swaminathan</surname>
<given-names>D</given-names>
</name>
<name>
<surname>George</surname>
<given-names>NA</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Iype</surname>
<given-names>EM</given-names>
</name>
</person-group>
<article-title>Factors associated with delay in diagnosis of oral cancers</article-title>
<source>Cancer Treat Res Commun</source>
<year iso-8601-date="2024">2024</year>
<volume>40</volume>
<elocation-id>100831</elocation-id>
<pub-id pub-id-type="doi">10.1016/j.ctarc.2024.100831</pub-id>
<pub-id pub-id-type="pmid">38996584</pub-id>
</element-citation>
</ref>
<ref id="B26">
<label>26</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>García-Valencia</surname>
<given-names>OA</given-names>
</name>
<name>
<surname>Thongprayoon</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Jadlowiec</surname>
<given-names>CC</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>SA</given-names>
</name>
<name>
<surname>Leeaphorn</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Budhiraja</surname>
<given-names>P</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>AI-driven translations for kidney transplant equity in Hispanic populations</article-title>
<source>Sci Rep</source>
<year iso-8601-date="2024">2024</year>
<volume>14</volume>
<elocation-id>8511</elocation-id>
<pub-id pub-id-type="doi">10.1038/s41598-024-59237-7</pub-id>
<pub-id pub-id-type="pmid">38609476</pub-id>
<pub-id pub-id-type="pmcid">PMC11014982</pub-id>
</element-citation>
</ref>
<ref id="B27">
<label>27</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dave</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Athaluri</surname>
<given-names>SA</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>ChatGPT in medicine: an overview of its applications, advantages, limitations, future prospects, and ethical considerations</article-title>
<source>Front Artif Intell</source>
<year iso-8601-date="2023">2023</year>
<volume>6</volume>
<elocation-id>1169595</elocation-id>
<pub-id pub-id-type="doi">10.3389/frai.2023.1169595</pub-id>
<pub-id pub-id-type="pmid">37215063</pub-id>
<pub-id pub-id-type="pmcid">PMC10192861</pub-id>
</element-citation>
</ref>
<ref id="B28">
<label>28</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Is</surname>
<given-names>EE</given-names>
</name>
<name>
<surname>Menekseoglu</surname>
<given-names>AK</given-names>
</name>
</person-group>
<article-title>Comparative performance of artificial intelligence models in rheumatology board-level questions: evaluating Google Gemini and ChatGPT-4o</article-title>
<source>Clin Rheumatol</source>
<year iso-8601-date="2024">2024</year>
<volume>43</volume>
<fpage>3507</fpage>
<lpage>13</lpage>
<pub-id pub-id-type="doi">10.1007/s10067-024-07154-5</pub-id>
<pub-id pub-id-type="pmid">39340572</pub-id>
</element-citation>
</ref>
<ref id="B29">
<label>29</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andrew</surname>
<given-names>A</given-names>
</name>
</person-group>
<article-title>Potential applications and implications of large language models in primary care</article-title>
<source>Fam Med Community Health</source>
<year iso-8601-date="2024">2024</year>
<volume>12</volume>
<elocation-id>e002602</elocation-id>
<pub-id pub-id-type="doi">10.1136/fmch-2023-002602</pub-id>
<pub-id pub-id-type="pmid">38290759</pub-id>
<pub-id pub-id-type="pmcid">PMC10828839</pub-id>
</element-citation>
</ref>
<ref id="B30">
<label>30</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aydin</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Yildirim</surname>
<given-names>ÖT</given-names>
</name>
<name>
<surname>Aydin</surname>
<given-names>AH</given-names>
</name>
<name>
<surname>Murat</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Basaran</surname>
<given-names>CH</given-names>
</name>
</person-group>
<article-title>Comparison of artificial intelligence-assisted informed consent obtained before coronary angiography with the conventional method: Medical competence and ethical assessment</article-title>
<source>Digit Health</source>
<year iso-8601-date="2023">2023</year>
<volume>9</volume>
<elocation-id>20552076231218141</elocation-id>
<pub-id pub-id-type="doi">10.1177/20552076231218141</pub-id>
<pub-id pub-id-type="pmid">38047164</pub-id>
<pub-id pub-id-type="pmcid">PMC10693205</pub-id>
</element-citation>
</ref>
<ref id="B31">
<label>31</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kirchner</surname>
<given-names>GJ</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>RY</given-names>
</name>
<name>
<surname>Weddle</surname>
<given-names>JB</given-names>
</name>
<name>
<surname>Bible</surname>
<given-names>JE</given-names>
</name>
</person-group>
<article-title>Can Artificial Intelligence Improve the Readability of Patient Education Materials?</article-title>
<source>Clin Orthop Relat Res</source>
<year iso-8601-date="2023">2023</year>
<volume>481</volume>
<fpage>2260</fpage>
<lpage>7</lpage>
<pub-id pub-id-type="doi">10.1097/CORR.0000000000002668</pub-id>
<pub-id pub-id-type="pmid">37116006</pub-id>
<pub-id pub-id-type="pmcid">PMC10566892</pub-id>
</element-citation>
</ref>
<ref id="B32">
<label>32</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Diniz-Freitas</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Rivas-Mundiña</surname>
<given-names>B</given-names>
</name>
<name>
<surname>García-Iglesias</surname>
<given-names>JR</given-names>
</name>
<name>
<surname>García-Mato</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Diz-Dios</surname>
<given-names>P</given-names>
</name>
</person-group>
<article-title>How ChatGPT performs in Oral Medicine: The case of oral potentially malignant disorders</article-title>
<source>Oral Dis</source>
<year iso-8601-date="2024">2024</year>
<volume>30</volume>
<fpage>1912</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.1111/odi.14750</pub-id>
<pub-id pub-id-type="pmid">37794649</pub-id>
</element-citation>
</ref>
<ref id="B33">
<label>33</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danesh</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Danesh</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Danesh</surname>
<given-names>F</given-names>
</name>
</person-group>
<article-title>Innovating dental diagnostics: ChatGPT’s accuracy on diagnostic challenges</article-title>
<source>Oral Dis</source>
<year iso-8601-date="2024">2024</year>
<volume>[Epub ahead of print]</volume>
<pub-id pub-id-type="doi">10.1111/odi.15082</pub-id>
<pub-id pub-id-type="pmid">39039720</pub-id>
</element-citation>
</ref>
<ref id="B34">
<label>34</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomo</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Lechien</surname>
<given-names>JR</given-names>
</name>
<name>
<surname>Bueno</surname>
<given-names>HS</given-names>
</name>
<name>
<surname>Cantieri-Debortoli</surname>
<given-names>DF</given-names>
</name>
<name>
<surname>Simonato</surname>
<given-names>LE</given-names>
</name>
</person-group>
<article-title>Accuracy and consistency of ChatGPT-3.5 and -4 in providing differential diagnoses in oral and maxillofacial diseases: a comparative diagnostic performance analysis</article-title>
<source>Clin Oral Investig</source>
<year iso-8601-date="2024">2024</year>
<volume>28</volume>
<elocation-id>544</elocation-id>
<pub-id pub-id-type="doi">10.1007/s00784-024-05939-1</pub-id>
<pub-id pub-id-type="pmid">39316174</pub-id>
</element-citation>
</ref>
<ref id="B35">
<label>35</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bekbolatova</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Mayer</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Ong</surname>
<given-names>CW</given-names>
</name>
<name>
<surname>Toma</surname>
<given-names>M</given-names>
</name>
</person-group>
<article-title>Transformative Potential of AI in Healthcare: Definitions, Applications, and Navigating the Ethical Landscape and Public Perspectives</article-title>
<source>Healthcare (Basel)</source>
<year iso-8601-date="2024">2024</year>
<volume>12</volume>
<elocation-id>125</elocation-id>
<pub-id pub-id-type="doi">10.3390/healthcare12020125</pub-id>
<pub-id pub-id-type="pmid">38255014</pub-id>
<pub-id pub-id-type="pmcid">PMC10815906</pub-id>
</element-citation>
</ref>
<ref id="B36">
<label>36</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>TF</given-names>
</name>
<name>
<surname>Thirunavukarasu</surname>
<given-names>AJ</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Poh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Teo</surname>
<given-names>ZL</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Artificial intelligence and digital health in global eye health: opportunities and challenges</article-title>
<source>Lancet Glob Health</source>
<year iso-8601-date="2023">2023</year>
<volume>11</volume>
<fpage>e1432</fpage>
<lpage>43</lpage>
<pub-id pub-id-type="doi">10.1016/S2214-109X(23)00323-6</pub-id>
<pub-id pub-id-type="pmid">37591589</pub-id>
</element-citation>
</ref>
<ref id="B37">
<label>37</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miranda-Filho</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Bray</surname>
<given-names>F</given-names>
</name>
</person-group>
<article-title>Global patterns and trends in cancers of the lip, tongue and mouth</article-title>
<source>Oral Oncol</source>
<year iso-8601-date="2020">2020</year>
<volume>102</volume>
<elocation-id>104551</elocation-id>
<pub-id pub-id-type="doi">10.1016/j.oraloncology.2019.104551</pub-id>
<pub-id pub-id-type="pmid">31986342</pub-id>
</element-citation>
</ref>
<ref id="B38">
<label>38</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Filho</surname>
<given-names>AM</given-names>
</name>
<name>
<surname>Warnakulasuriya</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>Epidemiology of oral cancer in South and South-East Asia: Incidence and mortality</article-title>
<source>Oral Dis</source>
<year iso-8601-date="2024">2024</year>
<volume>[Epub ahead of print]</volume>
<pub-id pub-id-type="doi">10.1111/odi.14906</pub-id>
<pub-id pub-id-type="pmid">38454553</pub-id>
</element-citation>
</ref>
</ref-list>
</back>
</article>