﻿<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">Explor Med</journal-id>
<journal-id journal-id-type="publisher-id">EM</journal-id>
<journal-title-group>
<journal-title>Exploration of Medicine</journal-title>
</journal-title-group>
<issn pub-type="epub">2692-3106</issn>
<publisher>
<publisher-name>Open Exploration Publishing</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.37349/emed.2023.00153</article-id>
<article-id pub-id-type="manuscript">1001153</article-id>
<article-categories>
<subj-group>
<subject>Original Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Small patient datasets reveal genetic drivers of non-small cell lung cancer subtypes using machine learning for hypothesis generation</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-0945-4645</contrib-id>
<name>
<surname>Cook</surname>
<given-names>Moses</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing—original draft</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-4984-7299</contrib-id>
<name>
<surname>Qorri</surname>
<given-names>Bessi</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing—original draft</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Baskar</surname>
<given-names>Amruth</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/software/">Software</role>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="I3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="I4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ziauddin</surname>
<given-names>Jalal</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/software/">Software</role>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-9273-2839</contrib-id>
<name>
<surname>Pani</surname>
<given-names>Luca</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<xref ref-type="aff" rid="I4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="I5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="I6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yenkanchi</surname>
<given-names>Shashibushan</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-0967-2164</contrib-id>
<name>
<surname>Geraci</surname>
<given-names>Joseph</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<role content-type="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing—review &amp; editing</role>
<xref ref-type="aff" rid="I2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="I7">
<sup>7</sup>
</xref>
<xref ref-type="aff" rid="I8">
<sup>8</sup>
</xref>
<xref ref-type="aff" rid="I9">
<sup>9</sup>
</xref>
<xref ref-type="corresp" rid="cor1">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="editor">
<name>
<surname>Farrer</surname>
<given-names>Lindsay A.</given-names>
</name>
<role>Academic Editor</role>
<aff>Boston University School of Medicine, USA</aff>
</contrib>
</contrib-group>
<aff id="I1">
<sup>1</sup>Department of Medical Biophysics, University of Toronto, Toronto, ON M5G 1L7, Canada</aff>
<aff id="I2">
<sup>2</sup>NetraMark, Toronto, ON M4P 2E5, Canada</aff>
<aff id="I3">
<sup>3</sup>Faculty of Mathematics, David R. Cheriton School of Computer Science, University of Waterloo, Waterloo, ON N2L 3G1, Canada</aff>
<aff id="I4">
<sup>4</sup>Department of Psychiatry and Behavioral Sciences, Leonard M. Miller School of Medicine, University of Miami, Coral Gables, FL 33124, USA</aff>
<aff id="I5">
<sup>5</sup>Department of Biomedical, Metabolic and Neural Sciences, University of Modena and Reggio Emilia, 41121 Modena, Italy</aff>
<aff id="I6">
<sup>6</sup>VeraSci, Durhan, NC 27707, USA</aff>
<aff id="I7">
<sup>7</sup>Department of Pathology and Molecular Medicine, Queen’s University, Kingston, ON K7L 3N6, Canada</aff>
<aff id="I8">
<sup>8</sup>The Centre for Biotechnology and Genomics Medicine, Medical College of Georgia, Augusta University, Augusta, GA 30912, USA</aff>
<aff id="I9">
<sup>9</sup>The Clarke Center for Human Imagination, University of California San Diego, La Jolla, CA 92093-0021, USA</aff>
<author-notes>
<corresp id="cor1">
<sup>*</sup>
<bold>Correspondence:</bold> Joseph Geraci, Department of Pathology and Molecular Medicine, Queen’s University, Kingston ON, Canada. <email>joseph.geraci@netramark.com</email></corresp>
</author-notes>
<pub-date pub-type="ppub">
<year>2023</year>
</pub-date>
<pub-date pub-type="epub">
<day>26</day>
<month>07</month>
<year>2023</year>
</pub-date>
<volume>4</volume>
<issue>4</issue>
<fpage>428</fpage>
<lpage>440</lpage>
<history>
<date date-type="received">
<day>17</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>03</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>© The Author(s) 2023.</copyright-statement>
<license xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>This is an Open Access article licensed under a Creative Commons Attribution 4.0 International License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, sharing, adaptation, distribution and reproduction in any medium or format, for any purpose, even commercially, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Aim:</title>
<p>Many small datasets of significant value exist in the medical space that are being underutilized. Due to the heterogeneity of complex disorders found in oncology, systems capable of discovering patient subpopulations while elucidating etiologies are of great value as they can indicate leads for innovative drug discovery and development.</p>
</sec>
<sec>
<title>Methods:</title>
<p>Two small non-small cell lung cancer (NSCLC) datasets (GSE18842 and GSE10245) consisting of 58 samples of adenocarcinoma (ADC) and 45 samples of squamous cell carcinoma (SCC) were used in a machine intelligence framework to identify genetic biomarkers differentiating these two subtypes. Utilizing a set of standard machine learning (ML) methods, subpopulations of ADC and SCC were uncovered while simultaneously extracting which genes, in combination, were significantly involved in defining the subpopulations. A previously described interactive hypothesis-generating method designed to work with ML methods was employed to provide an alternative way of extracting the most important combination of variables to construct a new data set.</p>
</sec>
<sec>
<title>Results:</title>
<p>Several genes were uncovered that were previously implicated by other methods. This framework accurately discovered known subpopulations, such as genetic drivers associated with differing levels of aggressiveness within the SCC and ADC subtypes. Furthermore, phyosphatidylinositol glycan anchor biosynthesis, class X (<italic>PIGX</italic>) was a novel gene implicated in this study that warrants further investigation due to its role in breast cancer proliferation.</p>
</sec>
<sec>
<title>Conclusions:</title>
<p>The ability to learn from small datasets was highlighted and revealed well-established properties of NSCLC. This showcases the utility of ML techniques to reveal potential genes of interest, even from small datasets, shedding light on novel driving factors behind subpopulations of patients.</p>
</sec>
</abstract>
<kwd-group>
<kwd>Artificial intelligence</kwd>
<kwd>small datasets</kwd>
<kwd>genetic subtypes</kwd>
<kwd>disease heterogeneity</kwd>
<kwd>squamous cell carcinoma</kwd>
<kwd>adenocarcinoma</kwd>
</kwd-group>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>NetraMark Corp</institution>
</funding-source>
</award-group>
</funding-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p id="p-1">The collection of transcriptomic data is expensive, resulting in datasets with a small number of sample sizes (in the hundreds) but thousands of variables. As a result, several techniques that are making significant strides in the imaging space, such as deep neural networks, are not suitable for these datasets, as they require a large number of samples. Furthermore, the heterogeneity of the patient population and the complexity of diseases found in oncology requires going beyond the labels. The development of techniques that can explain the driving variables behind patient subpopulations is tremendously valuable in identifying and developing novel therapeutic agents—this is particularly relevant for mapping out heterogeneous diseases such as lung cancer.</p>
<p id="p-2">Lung cancer is the leading cause of cancer mortality worldwide, with non-small cell lung cancer (NSCLC) accounting for 85% of all lung cancers [<xref ref-type="bibr" rid="B1">1</xref>]. NSCLC can be divided into three histological subtypes with distinct phenotypes and prognoses: adenocarcinoma (ADC), squamous cell carcinoma (SCC), and large cell carcinoma (LCC) [<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>]. The histological differences across these subtypes suggest that distinct molecular mechanisms underlie the observed phenotypic differences. Although the differential gene expressions across NSCLC subtypes have been of increasing interest, the therapeutic implications on how these pathways interact are only more recently being investigated [<xref ref-type="bibr" rid="B4">4</xref>]. The remarkable degree of genetic variability within each histological subtype further highlights the importance of molecular biology and genotyping for NSCLC [<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>].</p>
<p id="p-3">Fortunately, machine learning (ML) advancements have served as promising tools for stratifying NSCLC, predicting transcriptional mutations based on histological slides, and discriminating NSCLC subtypes through genomic expression levels. The bulk of ML efforts has focused on image analysis for predicting the stage of NSCLC [<xref ref-type="bibr" rid="B7">7</xref>–<xref ref-type="bibr" rid="B10">10</xref>]. However, the growing body of evidence highlighting the molecular abnormalities that underlie the genomic subtypes of NSCLC can train ML algorithms to identify novel biomarkers for NSCLC, moving towards precision medicine [<xref ref-type="bibr" rid="B11">11</xref>–<xref ref-type="bibr" rid="B13">13</xref>]. For instance, previous reports have identified that ADC is associated with increased expression of genes related to protein transport and cell junctions, while SCC is associated with increased expression of genes related to cell division and DNA replication [<xref ref-type="bibr" rid="B14">14</xref>]. An analysis of gene expression profiles between ADC and SCC using ML has been previously reported, identifying several genes including cystatin-A (<italic>CSTA</italic>), tumor protein p63 (<italic>TP63</italic>), serpin family B member 13 (<italic>SERPINB13</italic>), chloride channel accessory 2 (<italic>CLCA2</italic>), bicaudal D cargo adaptor 2 (<italic>BICD2</italic>), P53 apoptosis effector related to PMP22 (<italic>PERP</italic>), FAT atypical cadherin 2 (<italic>FAT2</italic>), basonuclin 1 (<italic>BNC1</italic>), ATPase phospholipid transporting 11B (<italic>ATP11B</italic>), family with sequence similarity 83 member B (<italic>FAM83B</italic>), keratin 5 (<italic>KRT5</italic>), par-6 family cell polarity regulator gamma (<italic>PARD6G</italic>), and plakophilin 1 (<italic>PKP1</italic>) which were differentially expressed in ADC and SCC [<xref ref-type="bibr" rid="B15">15</xref>].</p>
<p id="p-4">Other computational methods for discriminating genetic drivers of NSCLC have been previously investigated. A k-means clustering method was used to classify genetic subtypes of ADC [<xref ref-type="bibr" rid="B16">16</xref>]. Healthy and ADC tissue was then classified using a support vector machine followed by input into a self-organizing map neural network. The neurons in the output layer were categorized using a hierarchical clustering method to divide ADC tumours into four genetic subtypes. Two subtypes were found to have high expression levels of immune-related genes, suggesting the existence of heterogenous subpopulations of NSCLC. In another study, researchers also used hierarchal clustering of copy number variations to derive insights into NSCLC drug response [<xref ref-type="bibr" rid="B17">17</xref>].</p>
<p id="p-5">Several ML frameworks have been previously developed specifically tailored for small datasets. A one-shot learning approach called CancerSiamese has been used to predict cancer types while highlighting several marker genes to predict metastatic or primary tumour signatures [<xref ref-type="bibr" rid="B18">18</xref>]. A second ML approach has shown promise in deriving insights into immune cell populations in a rare disease application [<xref ref-type="bibr" rid="B19">19</xref>].</p>
<p id="p-6">In order to identify novel driving genes that distinguish these two broad subtypes, a combination of ML tools was designed to learn from patient datasets to analyze gene expression data derived from ADC and SCC NSCLC patients. Because large datasets are critical for most contemporary ML methods such as deep neural networks, there is a need for alternative techniques when data banks are insufficient to train the model. In addition, significant features found within small datasets may become diluted by more obvious statistical features and hence over-represented in large datasets. As such, ML methods must be carefully used and complemented by statistical methods that allow for the discovery of non-linear ways in which groups of genes may interact to drive disease heterogeneity. The methodology presented here is designed for small datasets—a novel way of hypothesizing genetic subpopulations that may result in pathogenesis. For example, the ML framework proposed here has been previously used on a small genetic dataset consisting of Alzheimer’s disease brain samples [<xref ref-type="bibr" rid="B20">20</xref>]. Several genetic pathways associated with Alzheimer’s disease were uncovered, suggesting that even with a small dataset, there exists a high degree of genetic complexity within pathophysiology. Similarly, the findings presented here support genes previously reported to distinguish ADC and SCC subtypes. However, the novelty of this work lies in the ability to discover previously unknown subpopulations that are defined by several genes at a time. These findings shed light on the different mechanisms at play within these subtypes.</p>
</sec>
<sec id="s2">
<title>Materials and methods</title>
<sec id="t2-1">
<title>Datasets</title>
<p id="p-7">Two data sets were used, consisting of 40 samples of ADC and 18 samples of SCC (GSE10245) [<xref ref-type="bibr" rid="B21">21</xref>] and 14 samples of ADC and 32 samples of SCC (GSE18842) [<xref ref-type="bibr" rid="B22">22</xref>] to obtain a total of 104 samples (<xref ref-type="table" rid="t1">Table 1</xref>). Only GSE10245 was used when analyzing gene expression levels for discriminating differences between sex as this data was omitted from GSE18842. Genetic expression levels denote Robust Multi-Array Average-calculated signal intensity [<xref ref-type="bibr" rid="B23">23</xref>].</p>
<table-wrap id="t1">
<label>Table 1</label>
<caption>
<p>Characteristics of datasets used to generate NSCLC hypotheses</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>
<bold>Dataset</bold>
</th>
<th>
<bold>ADC/SCC samples</bold>
</th>
<th>
<bold>Male/female samples</bold>
</th>
<th>
<bold>Reference</bold>
</th>
</tr>
</thead>
<tbody>
<tr>
<td>GSE10245</td>
<td>40/18</td>
<td>14/44</td>
<td>[<xref ref-type="bibr" rid="B21">21</xref>]</td>
</tr>
<tr>
<td>GSE18842</td>
<td>14/32</td>
<td>N/A</td>
<td>[<xref ref-type="bibr" rid="B22">22</xref>]</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="t2-2">
<title>Machine intelligence</title>
<p id="p-8">The methodology was developed to organize the resulting models from several well-known ML methods to explore NSCLC genetic heterogeneity within a small dataset. The only proprietary method used for these results is a novel feature selection tool that is part of the NetraAI system which incorporates systems biology [<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>] and can help produce clustering diagrams as provided in this paper. This was used to create several reduced datasets with significantly fewer variables, e.g., less than a hundred. These reduced datasets are available upon request to encourage reproducibility and further research. The following algorithm was used based on standard methods to create models and insights (<xref ref-type="fig" rid="fig1">Figure 1</xref>). For the work reported in this paper, the following tailored process was utilized after performing the aforementioned feature reduction:</p>
<p id="p-9">
<list list-type="simple">
<list-item>
<label>(1)</label>
<p>First, variable importance was calculated via ensemble trees (Random Forest) through cross-validation [<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>]. The dependent variables used were ADC <italic>vs.</italic> SCC.</p>
</list-item>
<list-item>
<label>(2)</label>
<p>Principal components were utilized as a linear unsupervised clustering method to reveal obvious subpopulation structures.</p>
</list-item>
<list-item>
<label>(3)</label>
<p>The loadings from the principal components were utilized to reduce the variables further.</p>
</list-item>
<list-item>
<label>(4)</label>
<p>Using the t-SNE [<xref ref-type="bibr" rid="B28">28</xref>], HDBSCAN [<xref ref-type="bibr" rid="B29">29</xref>], and UMAP [<xref ref-type="bibr" rid="B30">30</xref>] algorithms, subpopulations were extracted.</p>
</list-item>
<list-item>
<label>(5)</label>
<p>Sample IDs were collected from the clusters formed from these two clustering models, then systematically compared each group with the others, and applied statistical methods to determine differentially expressed gene candidates.</p>
</list-item>
</list>
</p>
<fig id="fig1" position="float">
<label>Figure 1</label>
<caption>
<p>ML approach for small datasets. Using two NSCLC datasets, a tailored ML approach was used consisting of feature selection with random forest, unsupervised clustering, cluster exploration with t-SNE, HDBSCAN, UMAP, and statistical analysis to obtain between group differential gene expression for NSCLC patient stratification. These results were validated using the proprietary NetraAI which generates hypotheses across different groups of patients. t-SNE: t-distributed stochastic neighbor embedding; HDBSCAN: hierarchical density-based spatial clustering of applications with noise; UMAP: uniform manifold approximation and projection; ANOVA: analysis of variance</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="em-04-1001153-g001.tif" />
</fig>
<p id="p-10">Clustering was performed via principal components, t-SNE, HDBSCAN, and UMAP as these were the basis of the maps found in this paper. These methods were used to organize the resulting clustering models, in addition to the random forest models, such that the models were capable of being explored interactively to derive a deeper understanding of the driving genes behind the subclusters [<xref ref-type="bibr" rid="B20">20</xref>].</p>
<p id="p-11">A critical shortcoming of working with small data is that it is highly unlikely to represent the totality of the real-world phenomenon it represents, in this case, NSCLC. This means that creating reliable models that are meant to become biomarkers for the disorder is nearly impossible. However, what is possible is the discovery of a subpopulation of patients that all have a set of variables in common, in this case, gene expression. This cluster of patients can be regarded as a hypothesis and therefore classical statistics can be used to evaluate the significance of the findings. In this way, small data sets can be interrogated with the tailored process summarized in <xref ref-type="fig" rid="fig1">Figure 1</xref> in order to extract potentially meaningful discoveries.</p>
<p id="p-12">The methods here and those described in [<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>] are designed to address how to extract clear insights about subgroups of patients and their driving variables, while innovative methods found in [<xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B32">32</xref>] are well suited to create models for predictions and decision making when sufficient data to do so is available.</p>
</sec>
<sec id="t2-3">
<title>Statistical analysis</title>
<p id="p-13">Statistical analyses were implemented in order to determine significant differences in gene expression data. The following represents a summary of the statistical methods employed:</p>
<p id="p-14">
<list list-type="simple">
<list-item>
<label>(1)</label>
<p>Bar plot means values represent the mean expression level while error bars represent the standard deviation (SD) of the pooled data from each probe ID. Bar plot <italic>P</italic>-values were calculated using an unpaired <italic>t</italic>-test, where <italic>P</italic>-values &lt; 0.05 were considered statistically significant.</p>
</list-item>
<list-item>
<label>(2)</label>
<p>To determine the significance of a gene, a standard student <italic>t</italic>-test was used when two subpopulations were compared, and if more than two subpopulations were compared, ANOVA was used. The resulting clusters were plotted for the purpose of illustrating the findings.</p>
</list-item>
<list-item>
<label>(3)</label>
<p>Bonferroni corrections were implemented whenever univariate statistics were utilized for feature selection and when initiation comparisons with ANOVA and <italic>t</italic>-tests.</p>
</list-item>
</list>
</p>
</sec>
</sec>
<sec id="s3">
<title>Results</title>
<sec id="t3-1">
<title>A tailored ML process identifies differentially expressed genes from a small NSCLC dataset</title>
<p id="p-15">Using the ADC and SCC tumour gene expression data, this tailored ML approach for small datasets was able to help generate a map distinguishing SCC (blue) and ADC (red) subjects, Loop 1 and Loop 2, respectively (<xref ref-type="fig" rid="fig2">Figure 2</xref>). The key genes that were found to have driven this distinction were desmocollin-3 (<italic>DSC3</italic>), visinin-like protein 1 (<italic>VSNL1</italic>), solute carrier family 6 member 10 (<italic>SLC6A10P</italic>), interferon regulatory transcription factor 6 (<italic>IRF6</italic>), dystonin (<italic>DST</italic>), <italic>CLCA2</italic>, desmoglein 3 (<italic>DSG3</italic>), lysophosphatidylcholine acetyltransferase 1 (<italic>LPCAT1</italic>), cingulin (<italic>CGN</italic>), and phyosphatidylinositol glycan anchor biosynthesis, class X (<italic>PIGX</italic>). Of these, all genes except for <italic>LPCAT1</italic> were overexpressed in Loop 1, the SCC subjects. Meanwhile, Loop 2 consisting of ADC subjects was characterized by <italic>LPCAT1</italic> overexpression.</p>
<fig id="fig2" position="float">
<label>Figure 2</label>
<caption>
<p>Stratification of NSCLC patients into SCC and ADC using NetraAI. Loop 1 consisting of SCC (blue) subjects and Loop 2 consisting of ADC (red) subjects were delineated by HDBSCAN. These subpopulations were identified by clustering methods that stratified patients due to the statistically significant differential expression of <italic>DSC3</italic>, <italic>VSNL1</italic>, <italic>SLC6A10P</italic>, <italic>IRF6</italic>, <italic>DST</italic>, <italic>CLCA2</italic>, <italic>DSG3</italic>, <italic>LPCAT1</italic>, and <italic>PIGX</italic> between the two loops</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="em-04-1001153-g002.tif" />
</fig>
<p id="p-16">Collectively, in the analysis of these two datasets, total of 10 genes were identified that discriminate ADC and SCC patient populations. It is worth mentioning that 9 of the 10 genes identified have been previously reported to be differentially expressed in SCC and ADC (<xref ref-type="table" rid="t2">Table 2</xref>), further validating the methods used here. The novel gene identified that has not been previously associated with NSCLC populations at the time of this report is <italic>PIGX</italic>.</p>
<table-wrap id="t2">
<label>Table 2</label>
<caption>
<p>Genes discriminating between SCC and ADC</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th>
<bold>Gene</bold>
</th>
<th>
<bold>Function</bold>
</th>
<th>
<bold>Upregulation (SCC/ADC)</bold>
</th>
<th>
<bold>Reference</bold>
</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<italic>DSC3</italic>
</td>
<td>Ca<sup>2+</sup>-dependent glycoprotein involved in cell adherence</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B22">22</xref>]</td>
</tr>
<tr>
<td>
<italic>VSNL1</italic> </td>
<td>Neuronal Ca<sup>2+</sup> sensor protein; tumour suppressor gene</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B33">33</xref>, <xref ref-type="bibr" rid="B34">34</xref>]</td>
</tr>
<tr>
<td>
<italic>IRF6</italic> </td>
<td>Transcription factor</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B35">35</xref>]</td>
</tr>
<tr>
<td>
<italic>DST</italic> </td>
<td>Cell adhesion</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B36">36</xref>]</td>
</tr>
<tr>
<td>
<italic>CLCA2</italic>
</td>
<td>Cell adhesion; tumour suppressor</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B37">37</xref>]</td>
</tr>
<tr>
<td>
<italic>PIGX</italic> </td>
<td>Tumour suppressor</td>
<td>SCC</td>
<td>-</td>
</tr>
<tr>
<td>
<italic>DSG3</italic>
</td>
<td>Cell adhesion</td>
<td>SCC</td>
<td>[<xref ref-type="bibr" rid="B38">38</xref>–<xref ref-type="bibr" rid="B40">40</xref>]</td>
</tr>
<tr>
<td>
<italic>LPCAT1</italic> </td>
<td>Cancer progression and metastasis</td>
<td>ADC</td>
<td>[<xref ref-type="bibr" rid="B41">41</xref>, <xref ref-type="bibr" rid="B42">42</xref>]</td>
</tr>
<tr>
<td>
<italic>SLC6A10</italic>
</td>
<td>Neurotransmitter transporter; *pseudogene of <italic>SLC6A8</italic></td>
<td>ADC</td>
<td>[<xref ref-type="bibr" rid="B43">43</xref>]</td>
</tr>
<tr>
<td>
<italic>CGN</italic>
</td>
<td>Tight junction</td>
<td>ADC</td>
<td>[<xref ref-type="bibr" rid="B21">21</xref>]</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>-: blank cell; Ca<sup>2+</sup>: calcium ion; *: pseudogene of its parent gene <italic>SLC6A8</italic></p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="t3-2">
<title>ADC and SCC are associated with distinct cellular adhesion molecules</title>
<p id="p-17">Reports of SCC being characterized by the upregulation of desmosome and gap junction genes and ADC characterized by the upregulation of tight junction genes suggest that NSCLC subtypes are associated with a distinct set of adhesion molecules [<xref ref-type="bibr" rid="B21">21</xref>]. Here, SCC was found to be associated with cell adhesion marker <italic>DSC3</italic>, and ADC was associated with tight junction marker <italic>CGN</italic> (<xref ref-type="fig" rid="fig3">Figure 3</xref>). Specifically, two probes corresponding to <italic>DSC3</italic> were identified, 206032_at and 206033_s_at. There was a statistically significant association of both <italic>DSC3</italic> probes with SCC (<italic>P</italic> &lt; 0.0001; <xref ref-type="fig" rid="fig3">Figure 3A</xref>). Interestingly, when looking at the dataset including sex, elevated expression of <italic>DSC3</italic> was associated with males; however, this was not statistically significant (<italic>P</italic> = 0.062 for 206032_at and <italic>P</italic> = 0.077 for 206033_s_at). In contrast, the two probes corresponding to <italic>CGN</italic>, 223232_s_at, and 223233_s_at were significantly associated with ADC (<italic>P</italic> &lt; 0.0001; <xref ref-type="fig" rid="fig3">Figure 3B</xref>). In contrast, the <italic>CGN</italic> probes were significantly associated with females (<italic>P</italic> = 0.014). These results highlight a potential role of sex-based differences in NSCLC that warrant further investigation.</p>
<fig id="fig3" position="float">
<label>Figure 3</label>
<caption>
<p>Differential expression of <italic>DSC3</italic> and <italic>CGN</italic> in SCC and ADC NSCLC patient subpopulations. (A) The expression levels of <italic>DSC3</italic> probes 206032_at and 206033_s_at (mean ± SD) in SCC and ADC subpopulations; (B) the expression levels of <italic>CGN</italic> probes 223232_s_at and 223233_s_at in SCC and ADC subpopulations</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="em-04-1001153-g003.tif" />
</fig>
</sec>
<sec id="t3-3">
<title>
<italic>SLC6A10P</italic> may be a key driver of an ADC subtype</title>
<p id="p-18">Further analysis of the two datasets revealed two distinct ADC patient subpopulations (<xref ref-type="fig" rid="fig4">Figure 4</xref>). These two loops were distinguished by <italic>SLC6A10P</italic>, with Loop 2 characterized by overexpression of <italic>SLC6A10P</italic> (<italic>P</italic> = 1.3 × 10<sup>–5</sup>). The association of <italic>SLC6A10P</italic> with ADC patients is in line with previous reports [<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B43">43</xref>]. However, increased expression of the pseudogene <italic>SLC6A10P</italic> in ADC has been associated with increased metastatic risk and reported to be a significant predictor of poor clinical outcome [<xref ref-type="bibr" rid="B43">43</xref>]. This suggests that within the ADC patient population there exist unique subpopulations that may be associated with increased aggressive and metastatic propensity.</p>
<fig id="fig4" position="float">
<label>Figure 4</label>
<caption>
<p>Semi-supervised clustering of ADC patient subpopulations using NetraAI. Analysis of the NSCLC patients revealed two distinct subpopulations of ADC (red) subjects delineated by HDBSCAN. Both Loop 1 and Loop 2 ADC subpopulations were identified by clustering methods that stratified patients due to statistically significant differential expression of <italic>SLC6A10P</italic> between the two Loops</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="em-04-1001153-g004.tif" />
</fig>
</sec>
<sec id="t3-4">
<title>
<italic>IRF6</italic> and <italic>CLCA2</italic> drive a unique SCC subpopulation</title>
<p id="p-19">Not surprisingly, further analysis also revealed two distinct subpopulations of SCC driven by <italic>IRF6</italic> and <italic>CLCA2</italic> (<xref ref-type="fig" rid="fig5">Figure 5A</xref>), which have been previously associated with SCC [<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B37">37</xref>]. <italic>IRF6</italic> and <italic>CLCA2</italic> expression levels were higher in SCC than in ADC (<italic>P</italic> &lt; 0.0001; <xref ref-type="fig" rid="fig5">Figure 5B</xref> and <xref ref-type="fig" rid="fig5">5C</xref>). Here, Loop 2 was associated with a significantly higher expression of both <italic>IRF6</italic> and <italic>CLCA2</italic> compared to Loop 1. The significance value between the <italic>CLCA2</italic> and <italic>IRF6</italic> probes in the two encircled SCC groups were evaluated to be 4.4 × 10<sup>–7</sup>, 5.8 × 10<sup>–3</sup>, 9.3 × 10<sup>–7</sup>, and 0.046 for the 206164_at, 206165_s_at, 206166_s_at and 1552477_a_at probes, respectively. Considering the strong association of both genes with one specific subpopulation of SCC patients, it highlights an avenue of research focusing on the pathways at play in the etiology of the disease as well as for the identification of novel drugs targeting their combined pathways.</p>
<fig id="fig5" position="float">
<label>Figure 5</label>
<caption>
<p>Semi-supervised clustering of SCC patients and differential expression of <italic>IRF6</italic> and <italic>CLCA2</italic>. (A) Analysis of NSCLC patients revealed two distinct subpopulations of SCC (blue) subjects delineated by HDBSCAN within NetraAI. Both Loop 1 and Loop 2 subpopulations were identified by clustering methods that stratified patients due to statistically significant differential expression of <italic>IRF6</italic> and <italic>CLCA2</italic>; (B) the expression levels of <italic>IRF6</italic> probe 1552477_a_at (mean ± SD) in SCC and ADC patient subpopulations; (C) the expression levels of <italic>CLCA2</italic> probes 206164_at, 206165_s_at, and 206166_s_at (mean ± SD) in SCC and ADC patient subpopulations</p>
</caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="em-04-1001153-g005.tif" />
</fig>
</sec>
</sec>
<sec id="s4">
<title>Discussion</title>
<p id="p-20">Using publicly available NSCLC datasets with a suite of ML techniques appropriate for small datasets had an excellent signal for separating ADC and SCC. The main philosophy followed here is that for smaller datasets, where the patients are unlikely to reflect the distribution of patients in the totality of reality, one can allow ML methods to generate hypotheses about the population available in a small dataset. This allows a researcher to benefit from the power of statistics, in that they can test the hypothesis and derive some measure of confidence. Proprietary methods like the NetraAI empower this hypothesis testing paradigm, but the method described above is also capable of expressing hypotheses in the form of patient clusters.</p>
<p id="p-21">Here, patient clusters were compared using statistical methods suitable for a dataset with so few samples in order to avoid overfitting that often comes with utilizing contemporary ML methods with small datasets. Further, the transparency of the driving factors is important so that human experience can be used to evaluate what is being ‘discovered’ by the machine.</p>
<p id="p-22">This study highlights the genetic heterogeneity within NSCLC subtypes. Using this dataset, a set of 10 genes that distinguish ADC and SCC were identified (<xref ref-type="table" rid="t2">Table 2</xref>). Within these 10 genes, 9 have been previously reported to be associated either with NSCLC or a specific subtype of NSCLC, validating this ML approach. These findings were aligned with previous reports on SCC genes being associated with the organization and assembly of cell and gap junctions, glutathione conjugation and the redox stress response, ECM organization and collagen-related proteins, interferon and cytokine signaling, and HLA downregulation and ADC genes associated with ECM organization proteins and complement, interferon and cytokine signaling, and collagen-related genes and proteins for ECM organization [<xref ref-type="bibr" rid="B44">44</xref>]. Another study identified epidermis development, cell division, and epithelial cell differentiation as the most common categories characterizing SCC, and cell adhesion enrichment, biological adhesion, and coagulation for ADC [<xref ref-type="bibr" rid="B45">45</xref>]. However, some of the genes identified have not been previously associated with NSCLC or a specific subtype and represent areas that warrant greater investigation for the advancement of precision medicine in NSCLC.</p>
<p id="p-23">The first of the previously reported NSCLC-associated genes identified was <italic>DSC3</italic>, which plays a role in epidermal morphology and keratinocyte proliferation [<xref ref-type="bibr" rid="B22">22</xref>]. There are several studies that report on <italic>DSC3</italic> distinguishing ADC from SCC, with a higher expression in SCC [<xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B46">46</xref>–<xref ref-type="bibr" rid="B48">48</xref>]. Notably, there has been a report on the association between <italic>DSC3</italic> and tumour suppressor activity in NSCLC mediated by inhibition of <italic>EGFR</italic> [<xref ref-type="bibr" rid="B49">49</xref>]. However, there remain contradictory associations between <italic>DSC3</italic> and prognosis, with elevated levels associated with increased metastatic risk in melanoma and better prognosis in lung and colon cancer [<xref ref-type="bibr" rid="B40">40</xref>]. This suggests that the same protein may have differential effects in the tumour microenvironment (TME), which presents an interesting field of research to understand how <italic>DSC3</italic> expression correlates with NSCLC subtypes depending on where they originate in the lung. Reports of upregulation of desmosomes and gap junctions in SCC and tight junctions in ADC suggest that SCC and ADC are characterized by a distinct set of adhesion molecules [<xref ref-type="bibr" rid="B21">21</xref>].</p>
<p id="p-24">In the results presented here, ADC has been reported to be characterized by tight junctions and was identified by <italic>CGN</italic> and SCC has been characterized by gap junctions and was identified by <italic>DSC3</italic> (<xref ref-type="fig" rid="fig3">Figure 3</xref>). Males have been reported to have a significantly poorer NSCLC prognosis compared to females, shifting efforts towards sex-based approaches to diagnosis, prognosis, and therapeutic interventions [<xref ref-type="bibr" rid="B50">50</xref>, <xref ref-type="bibr" rid="B51">51</xref>]. Additionally, estrogens have been associated with an increased risk of ADC in women despite equal expression of estrogen receptors α and β; however, the role remains unclear [<xref ref-type="bibr" rid="B52">52</xref>]. While there are several reports on the sex-based differences in cancer mechanisms, including differences in metabolism, immunity, and angiogenesis, differences in <italic>CGN</italic> and <italic>DSC3</italic> expression have not been previously reported to the best of our knowledge [<xref ref-type="bibr" rid="B53">53</xref>]. Gap junction proteins, also known as connexins, serve as channels that connect the interior of adjacent cells, facilitating intracellular homeostasis and coordination of activities via second messengers [<xref ref-type="bibr" rid="B54">54</xref>]. Desmosomes primarily provide mechanical strength via a structural network. In contrast, tight junctions form a barrier around the cell, regulating the permeability of the paracellular space [<xref ref-type="bibr" rid="B55">55</xref>, <xref ref-type="bibr" rid="B56">56</xref>]. These molecules play critical roles in epithelial-to-mesenchymal transition, a process involved in cancer metastasis. Aside from the current work relating CGN expression to females, no sex-based differences have been previously reported. This presents a unique field of research, as there may be different druggable targets for males and females. The variability of adhesion molecule expression across sex warrants further investigation to elucidate the details of the correlation and advance toward gender related precision medicine.</p>
<p id="p-25">Interestingly, <italic>SLC6A10P</italic> was the single gene that was found to distinguish between two specific subpopulations of ADC. <italic>SLC6A10P</italic> was previously found to be a marker for aggressive ADC [<xref ref-type="bibr" rid="B43">43</xref>], and recently, implicated within the Notch signaling pathway [<xref ref-type="bibr" rid="B57">57</xref>]. These findings suggest that <italic>SLC6A10P</italic> warrants further investigation as a genetic biomarker in the context of the ADC patient subpopulation. This demonstrates the power of machine intelligence to reveal etiologies within complex diseases, even when a small number of samples are present. However, the methods must be used to reveal subpopulations that can then be compared using appropriate statistical methods suitable for comparing small groups.</p>
<p id="p-26">With respect to the SCC patient population, <italic>CLCA2</italic> and <italic>IRF6</italic> were found to distinguish between two distinct SCC subpopulations. <italic>CLCA2</italic> has been reported to be highly expressed in SCC, suggesting that it may serve as a diagnostic marker to differentiate SCC from ADC. Female patients with <italic>CLCA2</italic>-negative SCC exhibited significantly poorer prognoses [<xref ref-type="bibr" rid="B37">37</xref>]. Furthermore, SCC expression was correlated with tumour grade upon histological characterization. In particular, <italic>CLCA2</italic>-negative samples were associated with poorly differentiated tumours [<xref ref-type="bibr" rid="B37">37</xref>].</p>
<p id="p-27">Most noteworthy, phosphatidylinositol glycan anchor biosynthesis class gene <italic>PIGX</italic>, was the only gene identified that has not been previously associated with NSCLC. However, there have been reports that <italic>PIGX</italic> promotes cancer cell proliferation by suppressing <italic>EHD2</italic> and <italic>ZIC1</italic> in breast cancer [<xref ref-type="bibr" rid="B58">58</xref>]. The authors reported that <italic>PIGX</italic> expression was associated with shorter recurrence-free survival. In the present study, <italic>PIGX</italic> was found to be a driver of ADC and SCC differentiation, being overexpressed in SCC patients (<xref ref-type="fig" rid="fig2">Figure 2</xref>). As a novel gene associated with NSCLC or a specific subtype, this highlights an area that warrants further investigation for the advancement of precision medicine in NSCLC.</p>
<p id="p-28">In order to create robust predictive models with machine intelligence, large datasets are required, but this study utilized the ability for some of these methods to create hypotheses instead, and then use methods appropriate for small data to test these hypotheses. This method uncovered several genetic subtypes of ADC of SCC, including those driven by <italic>SLC6A10P</italic>, <italic>CLCA2</italic>, and <italic>IRF6</italic>, respectively. Furthermore, these data suggest that the expression levels of adhesion proteins encoded by <italic>CGN</italic> and <italic>DSC3</italic> may play a role in sex-based differences in NSCLC. Finally, this study uncovered a statistically significant driver of NSCLC heterogeneity, <italic>PIGX</italic>, which warrants further investigation.</p>
<p id="p-29">This report highlights the use of a novel set of ML techniques that are appropriate for small datasets. The primary aim of using such techniques is to encourage other researchers to explore small datasets that are often otherwise skipped with ML as there may be hidden valuable information within them. Adopting these approaches, one can extract meaningful insights with the techniques described here to move closer toward precision medicine.</p>
</sec>
</body>
<back>
<glossary>
<title>Abbreviations</title>
<def-list>
<def-item>
<term>ADC</term>
<def>
<p>adenocarcinoma</p>
</def>
</def-item>
<def-item>
<term>ANOVA</term>
<def>
<p>analysis of variance</p>
</def>
</def-item>
<def-item>
<term>
<italic>CGN</italic>
</term>
<def>
<p>cingulin</p>
</def>
</def-item>
<def-item>
<term>
<italic>CLCA2</italic>
</term>
<def>
<p>chloride channel accessory 2</p>
</def>
</def-item>
<def-item>
<term>
<italic>DSC3</italic>
</term>
<def>
<p>desmocollin-3</p>
</def>
</def-item>
<def-item>
<term>
<italic>DSG3</italic>
</term>
<def>
<p>desmoglein 3</p>
</def>
</def-item>
<def-item>
<term>
<italic>DST</italic>
</term>
<def>
<p>dystonin</p>
</def>
</def-item>
<def-item>
<term>HDBSCAN</term>
<def>
<p>hierarchical density-based spatial clustering of applications with noise</p>
</def>
</def-item>
<def-item>
<term>
<italic>IRF6</italic>
</term>
<def>
<p>interferon regulatory transcription factor 6</p>
</def>
</def-item>
<def-item>
<term>
<italic>LPCAT1</italic>
</term>
<def>
<p>lysophosphatidylcholine acetyltransferase 1</p>
</def>
</def-item>
<def-item>
<term>ML</term>
<def>
<p>machine learning</p>
</def>
</def-item>
<def-item>
<term>NSCLC</term>
<def>
<p>non-small cell lung cancer</p>
</def>
</def-item>
<def-item>
<term>
<italic>PIGX</italic>
</term>
<def>
<p>phyosphatidylinositol glycan anchor biosynthesis, class X</p>
</def>
</def-item>
<def-item>
<term>SCC</term>
<def>
<p>squamous cell carcinoma</p>
</def>
</def-item>
<def-item>
<term>SD</term>
<def>
<p>standard deviation</p>
</def>
</def-item>
<def-item>
<term>
<italic>SLC6A10P</italic>
</term>
<def>
<p>solute carrier family 6 member 10</p>
</def>
</def-item>
<def-item>
<term>t-SNE</term>
<def>
<p>t-distributed stochastic neighbor embedding</p>
</def>
</def-item>
<def-item>
<term>UMAP</term>
<def>
<p>uniform manifold approximation and projection</p>
</def>
</def-item>
<def-item>
<term>
<italic>VSNL1</italic>
</term>
<def>
<p>visinin-like protein 1</p>
</def>
</def-item>
</def-list>
</glossary>
<sec id="s8">
<title>Declarations</title>
<sec>
<title>Author contributions</title>
<p>JG: Conceptualization, Methodology, Resources, Writing—review &amp; editing, Supervision, Project administration, Funding acquisition. MC: Conceptualization, Visualization. MC and BQ: Methodology, Validation, Formal analysis, Investigation, Data curation, Writing—original draft, Writing—review &amp; editing. JZ, SY and AB: Software. SY: Validation. LP and JG: Writing—review &amp; editing. LP: Project administration.</p>
</sec>
<sec sec-type="COI-statement">
<title>Conflicts of interest</title>
<p>JG is a major shareholder of NetraMark Corp, where NetraMark is a technology company providing clinical trial support to pharmaceutical companies. LP has previously acted as a scientific consultant for AbbVie USA; Acadia USA; BCG Switzerland; Boehringer Ingelheim International GmbH; Compass Pathways; EDRA-Publishing, Italy; Ferrer Spain; Gedeon-Richter, Hungary; Inpeco SA, Switzerland; Johnson &amp; Johnson USA; NeuroCog Trials USA; Novartis-Gene Therapies, Switzerland; Otsuka USA; Pfizer Global USA; PharmaMar Spain; Relmada Therapeutics USA; Takeda, USA; VeraSci, USA; Vifor Switzerland.</p>
</sec>
<sec>
<title>Ethical approval</title>
<p>Not applicable.</p>
</sec>
<sec>
<title>Consent to participate</title>
<p>Not applicable.</p>
</sec>
<sec>
<title>Consent to publication</title>
<p>Not applicable.</p>
</sec>
<sec sec-type="data-availability">
<title>Availability of data and materials</title>
<p>Data was obtained from publicly available datasets GSE10245 <ext-link xlink:href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE10245" ext-link-type="uri">https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE10245</ext-link> and GSE18842 <ext-link xlink:href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE18842" ext-link-type="uri">https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE18842</ext-link>.</p>
</sec>
<sec>
<title>Funding</title>
<p>Part of this research was funded by NetraMark Corp in the form of salary for Dr. Joseph Geraci, and computational resources. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
</sec>
<sec>
<title>Copyright</title>
<p>© The Author(s) 2023.</p>
</sec>
</sec>
<ref-list>
<ref id="B1">
<label>1</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ridge</surname>
<given-names>CA</given-names>
</name>
<name>
<surname>McErlean</surname>
<given-names>AM</given-names>
</name>
<name>
<surname>Ginsberg</surname>
<given-names>MS</given-names>
</name>
</person-group>
<article-title>Epidemiology of lung cancer</article-title>
<source>Semin Intervent Radiol</source>
<year iso-8601-date="2013">2013</year>
<volume>30</volume>
<fpage>93</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="pmid">27261907</pub-id></element-citation>
</ref>
<ref id="B2">
<label>2</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>SV</given-names>
</name>
<name>
<surname>Subramaniam</surname>
<given-names>DS</given-names>
</name>
<name>
<surname>Giaccone</surname>
<given-names>G</given-names>
</name>
</person-group>
<article-title>Refining the treatment of NSCLC according to histological and molecular subtypes</article-title>
<source>Nat Rev Clin Oncol</source>
<year iso-8601-date="2015">2015</year>
<volume>12</volume>
<fpage>511</fpage>
<lpage>26</lpage>
<pub-id pub-id-type="doi">10.1038/nrclinonc.2015.90</pub-id><pub-id pub-id-type="pmid">25963091</pub-id></element-citation>
</ref>
<ref id="B3">
<label>3</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lawrence</surname>
<given-names>MS</given-names>
</name>
<name>
<surname>Stojanov</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Mermel</surname>
<given-names>CH</given-names>
</name>
<name>
<surname>Robinson</surname>
<given-names>JT</given-names>
</name>
<name>
<surname>Garraway</surname>
<given-names>LA</given-names>
</name>
<name>
<surname>Golub</surname>
<given-names>TR</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Discovery and saturation analysis of cancer genes across 21 tumour types</article-title>
<source>Nature</source>
<year iso-8601-date="2014">2014</year>
<volume>505</volume>
<fpage>495</fpage>
<lpage>501</lpage>
<pub-id pub-id-type="doi">10.1038/nature12912</pub-id><pub-id pub-id-type="pmid">24390350</pub-id><pub-id pub-id-type="pmcid">PMC4048962</pub-id></element-citation>
</ref>
<ref id="B4">
<label>4</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pikor</surname>
<given-names>LA</given-names>
</name>
<name>
<surname>Ramnarine</surname>
<given-names>VR</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>WL</given-names>
</name>
</person-group>
<article-title>Genetic alterations defining NSCLC subtypes and their therapeutic implications</article-title>
<source>Lung Cancer</source>
<year iso-8601-date="2013">2013</year>
<volume>82</volume>
<fpage>179</fpage>
<lpage>89</lpage>
<pub-id pub-id-type="doi">10.1016/j.lungcan.2013.07.025</pub-id><pub-id pub-id-type="pmid">24011633</pub-id></element-citation>
</ref>
<ref id="B5">
<label>5</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manegold</surname>
<given-names>C</given-names>
</name>
</person-group>
<article-title>Treatment algorithm in 2014 for advanced non-small cell lung cancer: therapy selection by tumour histology and molecular biology</article-title>
<source>Adv Med Sci</source>
<year iso-8601-date="2014">2014</year>
<volume>59</volume>
<fpage>308</fpage>
<lpage>13</lpage>
<pub-id pub-id-type="doi">10.1016/j.advms.2014.08.008</pub-id><pub-id pub-id-type="pmid">25240504</pub-id></element-citation>
</ref>
<ref id="B6">
<label>6</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carnio</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Novello</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Bironzo</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Scagliotti</surname>
<given-names>GV</given-names>
</name>
</person-group>
<article-title>Moving from histological subtyping to molecular characterization: new treatment opportunities in advanced non-small-cell lung cancer</article-title>
<source>Expert Rev Anticancer Ther</source>
<year iso-8601-date="2014">2014</year>
<volume>14</volume>
<fpage>1495</fpage>
<lpage>513</lpage>
<pub-id pub-id-type="doi">10.1586/14737140.2014.949245</pub-id><pub-id pub-id-type="pmid">25183305</pub-id></element-citation>
</ref>
<ref id="B7">
<label>7</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>J</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Prediction of pathologic stage in non-small cell lung cancer using machine learning algorithm based on CT image feature analysis</article-title>
<source>BMC Cancer</source>
<year iso-8601-date="2019">2019</year>
<volume>19</volume>
<elocation-id>464</elocation-id>
<pub-id pub-id-type="doi">10.1186/s12885-019-5646-9</pub-id><pub-id pub-id-type="pmid">31101024</pub-id><pub-id pub-id-type="pmcid">PMC6525347</pub-id></element-citation>
</ref>
<ref id="B8">
<label>8</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tau</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Stundzia</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Yasufuku</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Hussey</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Metser</surname>
<given-names>U</given-names>
</name>
</person-group>
<article-title>Convolutional neural networks in predicting nodal and distant metastatic potential of newly diagnosed non-small cell lung cancer on FDG PET images</article-title>
<source>AJR Am J Roentgenol</source>
<year iso-8601-date="2020">2020</year>
<volume>215</volume>
<fpage>192</fpage>
<lpage>7</lpage>
<pub-id pub-id-type="doi">10.2214/AJR.19.22346</pub-id><pub-id pub-id-type="pmid">32348182</pub-id></element-citation>
</ref>
<ref id="B9">
<label>9</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kriegsmann</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Haag</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Weis</surname>
<given-names>CA</given-names>
</name>
<name>
<surname>Steinbuss</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Warth</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Zgorzelski</surname>
<given-names>C</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Deep learning for the classification of small-cell and non-small-cell lung cancer</article-title>
<source>Cancers (Basel)</source>
<year iso-8601-date="2020">2020</year>
<volume>12</volume>
<elocation-id>1604</elocation-id>
<pub-id pub-id-type="doi">10.3390/cancers12061604</pub-id><pub-id pub-id-type="pmid">32560475</pub-id><pub-id pub-id-type="pmcid">PMC7352768</pub-id></element-citation>
</ref>
<ref id="B10">
<label>10</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mu</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Gray</surname>
<given-names>JE</given-names>
</name>
<name>
<surname>Tunali</surname>
<given-names>I</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Non-invasive decision support for NSCLC treatment using PET/CT radiomics</article-title>
<source>Nat Commun</source>
<year iso-8601-date="2020">2020</year>
<volume>11</volume>
<elocation-id>5228</elocation-id>
<pub-id pub-id-type="doi">10.1038/s41467-020-19116-x</pub-id><pub-id pub-id-type="pmid">33067442</pub-id><pub-id pub-id-type="pmcid">PMC7567795</pub-id></element-citation>
</ref>
<ref id="B11">
<label>11</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rabbani</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Kanevsky</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Kafi</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Chandelier</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Giles</surname>
<given-names>FJ</given-names>
</name>
</person-group>
<article-title>Role of artificial intelligence in the care of patients with nonsmall cell lung cancer</article-title>
<source>Eur J Clin Invest</source>
<year iso-8601-date="2018">2018</year>
<volume>48</volume>
<elocation-id>e12901</elocation-id>
<pub-id pub-id-type="doi">10.1111/eci.12901</pub-id><pub-id pub-id-type="pmid">29405289</pub-id></element-citation>
</ref>
<ref id="B12">
<label>12</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lawrence</surname>
<given-names>MS</given-names>
</name>
<name>
<surname>Stojanov</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Polak</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Kryukov</surname>
<given-names>GV</given-names>
</name>
<name>
<surname>Cibulskis</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Sivachenko</surname>
<given-names>A</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Mutational heterogeneity in cancer and the search for new cancer-associated genes</article-title>
<source>Nature</source>
<year iso-8601-date="2013">2013</year>
<volume>499</volume>
<fpage>214</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.1038/nature12213</pub-id><pub-id pub-id-type="pmid">23770567</pub-id><pub-id pub-id-type="pmcid">PMC3919509</pub-id></element-citation>
</ref>
<ref id="B13">
<label>13</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Podolsky</surname>
<given-names>MD</given-names>
</name>
<name>
<surname>Barchuk</surname>
<given-names>AA</given-names>
</name>
<name>
<surname>Kuznetcov</surname>
<given-names>VI</given-names>
</name>
<name>
<surname>Gusarova</surname>
<given-names>NF</given-names>
</name>
<name>
<surname>Gaidukov</surname>
<given-names>VS</given-names>
</name>
<name>
<surname>Tarakanov</surname>
<given-names>SA</given-names>
</name>
</person-group>
<article-title>Evaluation of machine learning algorithm utilization for lung cancer classification based on gene expression levels</article-title>
<source>Asian Pac J Cancer Prev</source>
<year iso-8601-date="2016">2016</year>
<volume>17</volume>
<fpage>835</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.7314/apjcp.2016.17.2.835</pub-id><pub-id pub-id-type="pmid">26925688</pub-id></element-citation>
</ref>
<ref id="B14">
<label>14</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y</given-names>
</name>
</person-group>
<article-title>
<italic>In silico</italic> comparative genomic analysis of two non-small cell lung cancer subtypes and their potentials for cancer classification</article-title>
<source>Cancer Genomics Proteomics</source>
<year iso-8601-date="2014">2014</year>
<volume>11</volume>
<fpage>303</fpage>
<lpage>10</lpage>
<pub-id pub-id-type="pmid">25422361</pub-id></element-citation>
</ref>
<ref id="B15">
<label>15</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q</given-names>
</name>
</person-group>
<article-title>Analysis of gene expression profiles of lung cancer subtypes with machine learning algorithms</article-title>
<source>Biochim Biophys Acta Mol Basis Dis</source>
<year iso-8601-date="2020">2020</year>
<volume>1866</volume>
<elocation-id>165822</elocation-id>
<pub-id pub-id-type="doi">10.1016/j.bbadis.2020.165822</pub-id><pub-id pub-id-type="pmid">32360590</pub-id></element-citation>
</ref>
<ref id="B16">
<label>16</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>Q</given-names>
</name>
</person-group>
<article-title>Gene expression classification of lung adenocarcinoma into molecular subtypes</article-title>
<source>IEEE/ACM Trans Comput Biol Bioinform</source>
<year iso-8601-date="2020">2020</year>
<volume>17</volume>
<fpage>1187</fpage>
<lpage>97</lpage>
<pub-id pub-id-type="doi">10.1109/TCBB.2019.2905553</pub-id><pub-id pub-id-type="pmid">30892233</pub-id></element-citation>
</ref>
<ref id="B17">
<label>17</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>Z</given-names>
</name>
</person-group>
<article-title>Pharmacogenomic cluster analysis of lung cancer cell lines provides insights into preclinical model selection in NSCLC</article-title>
<source>Interdiscip Sci</source>
<year iso-8601-date="2022">2022</year>
<volume>14</volume>
<fpage>712</fpage>
<lpage>21</lpage>
<pub-id pub-id-type="doi">10.1007/s12539-022-00517-z</pub-id><pub-id pub-id-type="pmid">35476185</pub-id></element-citation>
</ref>
<ref id="B18">
<label>18</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mostavi</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>YC</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
</person-group>
<article-title>CancerSiamese: one-shot learning for predicting primary and metastatic tumor types unseen during model training</article-title>
<source>BMC Bioinformatics</source>
<year iso-8601-date="2021">2021</year>
<volume>22</volume>
<elocation-id>244</elocation-id>
<pub-id pub-id-type="doi">10.1186/s12859-021-04157-w</pub-id><pub-id pub-id-type="pmid">33980137</pub-id><pub-id pub-id-type="pmcid">PMC8117642</pub-id></element-citation>
</ref>
<ref id="B19">
<label>19</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robinson</surname>
<given-names>GA</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Dönnes</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Coelewij</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Naja</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Radziszewska</surname>
<given-names>A</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Disease-associated and patient-specific immune cell signatures in juvenile-onset systemic lupus erythematosus: patient stratification using a machine-learning approach</article-title>
<source>Lancet Rheumatol</source>
<year iso-8601-date="2020">2020</year>
<volume>2</volume>
<fpage>e485</fpage>
<lpage>96</lpage>
<pub-id pub-id-type="doi">10.1016/S2665-9913(20)30168-5</pub-id><pub-id pub-id-type="pmid">32818204</pub-id><pub-id pub-id-type="pmcid">PMC7425802</pub-id></element-citation>
</ref>
<ref id="B20">
<label>20</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qorri</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Tsay</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Au</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Geraci</surname>
<given-names>J</given-names>
</name>
</person-group>
<article-title>Using machine intelligence to uncover Alzheimer’s disease progression heterogeneity</article-title>
<source>Explor Med</source>
<year iso-8601-date="2020">2020</year>
<volume>1</volume>
<fpage>377</fpage>
<lpage>95</lpage>
<pub-id pub-id-type="doi">10.37349/emed.2020.00026</pub-id></element-citation>
</ref>
<ref id="B21">
<label>21</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kuner</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Muley</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Meister</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Ruschhaupt</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Buness</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>EC</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Global gene expression analysis reveals specific patterns of cell junctions in non-small cell lung cancer subtypes</article-title>
<source>Lung Cancer</source>
<year iso-8601-date="2009">2009</year>
<volume>63</volume>
<fpage>32</fpage>
<lpage>8</lpage>
<pub-id pub-id-type="doi">10.1016/j.lungcan.2008.03.033</pub-id><pub-id pub-id-type="pmid">18486272</pub-id></element-citation>
</ref>
<ref id="B22">
<label>22</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanchez-Palencia</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Gomez-Morales</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Gomez-Capilla</surname>
<given-names>JA</given-names>
</name>
<name>
<surname>Pedraza</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Boyero</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Rosell</surname>
<given-names>R</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Gene expression profiling reveals novel biomarkers in nonsmall cell lung cancer</article-title>
<source>Int J Cancer</source>
<year iso-8601-date="2011">2011</year>
<volume>129</volume>
<fpage>355</fpage>
<lpage>64</lpage>
<pub-id pub-id-type="doi">10.1002/ijc.25704</pub-id><pub-id pub-id-type="pmid">20878980</pub-id></element-citation>
</ref>
<ref id="B23">
<label>23</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Irizarry</surname>
<given-names>RA</given-names>
</name>
<name>
<surname>Gentleman</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Martinez-Murillo</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Spencer</surname>
<given-names>F</given-names>
</name>
</person-group>
<article-title>A model-based background adjustment for oligonucleotide expression arrays</article-title>
<source>J Am Stat Assoc</source>
<year iso-8601-date="2004">2004</year>
<volume>99</volume>
<fpage>909</fpage>
<lpage>17</lpage>
</element-citation>
</ref>
<ref id="B24">
<label>24</label>
<element-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Tsay</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Geraci</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>A</given-names>
</name>
</person-group>
<article-title>Next-gen AI for disease definition, patient stratification, and placebo effect</article-title>
<comment>OSF Preprints [Preprint]. 2020 [cited 2023 Jan 21]. Available from: <uri xlink:href="https://osf.io/pc7ak/">https://osf.io/pc7ak/</uri></comment>
</element-citation>
</ref>
<ref id="B25">
<label>25</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Choi</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Bodenstein</surname>
<given-names>DF</given-names>
</name>
<name>
<surname>Geraci</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Andreazza</surname>
<given-names>AC</given-names>
</name>
</person-group>
<article-title>Evaluation of postmortem microarray data in bipolar disorder using traditional data comparison and artificial intelligence reveals novel gene targets</article-title>
<source>J Psychiatr Res</source>
<year iso-8601-date="2021">2021</year>
<volume>142</volume>
<fpage>328</fpage>
<lpage>36</lpage>
<pub-id pub-id-type="doi">10.1016/j.jpsychires.2021.08.011</pub-id><pub-id pub-id-type="pmid">34419753</pub-id></element-citation>
</ref>
<ref id="B26">
<label>26</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lai</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Reinders</surname>
<given-names>MJ</given-names>
</name>
<name>
<surname>van’t Veer</surname>
<given-names>LJ</given-names>
</name>
<name>
<surname>Wessels</surname>
<given-names>LF</given-names>
</name>
</person-group>
<article-title>A comparison of univariate and multivariate gene selection techniques for classification of cancer datasets</article-title>
<source>BMC Bioinformatics</source>
<year iso-8601-date="2006">2006</year>
<volume>7</volume>
<elocation-id>235</elocation-id>
<pub-id pub-id-type="doi">10.1186/1471-2105-7-235</pub-id><pub-id pub-id-type="pmid">16670007</pub-id><pub-id pub-id-type="pmcid">PMC1569875</pub-id></element-citation>
</ref>
<ref id="B27">
<label>27</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ishwaran</surname>
<given-names>H</given-names>
</name>
</person-group>
<article-title>Random forests for genomic data analysis</article-title>
<source>Genomics</source>
<year iso-8601-date="2012">2012</year>
<volume>99</volume>
<fpage>323</fpage>
<lpage>9</lpage>
<pub-id pub-id-type="doi">10.1016/j.ygeno.2012.04.003</pub-id><pub-id pub-id-type="pmid">22546560</pub-id><pub-id pub-id-type="pmcid">PMC3387489</pub-id></element-citation>
</ref>
<ref id="B28">
<label>28</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>van der Maaten</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G</given-names>
</name>
</person-group>
<article-title>Visualizing data using t-SNE</article-title>
<source>JMLR</source>
<year iso-8601-date="2008">2008</year>
<volume>9</volume>
<fpage>2579</fpage>
<lpage>605</lpage>
</element-citation>
</ref>
<ref id="B29">
<label>29</label>
<element-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>McInnes</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Healy</surname>
<given-names>J</given-names>
</name>
</person-group>
<article-title>Accelerated hierarchical density based clustering</article-title>
<comment>In: 2017 IEEE International Conference on Data Mining Workshops (ICDMW). 2017 IEEE International Conference on Data Mining Workshops (ICDMW); 2017 Nov 18–21; New Orleans, LA, USA. IEEE; 2017. pp. 33–42.</comment>
<pub-id pub-id-type="doi">10.1109/ICDMW.2017.12</pub-id></element-citation>
</ref>
<ref id="B30">
<label>30</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>McInnes</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Healy</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Saul</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Großberger</surname>
<given-names>L</given-names>
</name>
</person-group>
<article-title>UMAP: uniform manifold approximation and projection</article-title>
<source>J Open Source Softw</source>
<year iso-8601-date="2018">2018</year>
<volume>3</volume>
<elocation-id>861</elocation-id>
<pub-id pub-id-type="doi">10.21105/joss.00861</pub-id></element-citation>
</ref>
<ref id="B31">
<label>31</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Y</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Big medical data decision-making intelligent system exploiting fuzzy inference logic for prostate cancer in developing countries</article-title>
<source>IEEE Access</source>
<year iso-8601-date="2019">2019</year>
<volume>7</volume>
<fpage>2348</fpage>
<lpage>63</lpage>
<pub-id pub-id-type="doi">10.1109/ACCESS.2018.2886198</pub-id></element-citation>
</ref>
<ref id="B32">
<label>32</label>
<element-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Khushi</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Moni</surname>
<given-names>MA</given-names>
</name>
<name>
<surname>Uddin</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Poon</surname>
<given-names>SK</given-names>
</name>
</person-group>
<article-title>Lung cancer prediction using curriculum learning based deep neural networks</article-title>
<comment>In: 2021 IEEE International Conference on Digital Health (ICDH). 2021 IEEE International Conference on Digital Health (ICDH); 2021 Sep 5–10;Chicago, IL, USA. IEEE; 2021. pp.11–8.</comment>
<pub-id pub-id-type="doi">10.1109/ICDH52753.2021.00013</pub-id></element-citation>
</ref>
<ref id="B33">
<label>33</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Fong</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Bellacosa</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Ross</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Apostolou</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Bassi</surname>
<given-names>DE</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>
<italic>VILIP-1</italic> downregulation in non-small cell lung carcinomas: mechanisms and prediction of survival</article-title>
<source>PLoS One</source>
<year iso-8601-date="2008">2008</year>
<volume>3</volume>
<elocation-id>e1698</elocation-id>
<pub-id pub-id-type="doi">10.1371/journal.pone.0001698</pub-id><pub-id pub-id-type="pmid">18301774</pub-id><pub-id pub-id-type="pmcid">PMC2246032</pub-id></element-citation>
</ref>
<ref id="B34">
<label>34</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gonzalez</surname>
<given-names>Guerrico AM</given-names>
</name>
<name>
<surname>Jaffer</surname>
<given-names>ZM</given-names>
</name>
<name>
<surname>Page</surname>
<given-names>RE</given-names>
</name>
<name>
<surname>Braunewell</surname>
<given-names>KH</given-names>
</name>
<name>
<surname>Chernoff</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Klein-Szanto</surname>
<given-names>AJ</given-names>
</name>
</person-group>
<article-title>Visinin-like protein-1 is a potent inhibitor of cell adhesion and migration in squamous carcinoma cells</article-title>
<source>Oncogene</source>
<year iso-8601-date="2005">2005</year>
<volume>24</volume>
<fpage>2307</fpage>
<lpage>16</lpage>
<pub-id pub-id-type="doi">10.1038/sj.onc.1208476</pub-id><pub-id pub-id-type="pmid">15735716</pub-id></element-citation>
</ref>
<ref id="B35">
<label>35</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>B</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Interferon regulatory factor 6 correlates with the progression of non-small cell lung cancer and can be regulated by miR-320</article-title>
<source>J Pharm Pharmacol</source>
<year iso-8601-date="2021">2021</year>
<volume>73</volume>
<fpage>682</fpage>
<lpage>91</lpage>
<pub-id pub-id-type="doi">10.1093/jpp/rgab009</pub-id><pub-id pub-id-type="pmid">33772297</pub-id></element-citation>
</ref>
<ref id="B36">
<label>36</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chae</surname>
<given-names>YK</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>WM</given-names>
</name>
<name>
<surname>Bae</surname>
<given-names>WH</given-names>
</name>
<name>
<surname>Anker</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>AA</given-names>
</name>
<name>
<surname>Agte</surname>
<given-names>S</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Overexpression of adhesion molecules and barrier molecules is associated with differential infiltration of immune cells in non-small cell lung cancer</article-title>
<source>Sci Rep</source>
<year iso-8601-date="2018">2018</year>
<volume>8</volume>
<elocation-id>1023</elocation-id>
<pub-id pub-id-type="doi">10.1038/s41598-018-19454-3</pub-id><pub-id pub-id-type="pmid">29348685</pub-id><pub-id pub-id-type="pmcid">PMC5773521</pub-id></element-citation>
</ref>
<ref id="B37">
<label>37</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shinmura</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Igarashi</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Kato</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Kawanishi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Inoue</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Nakamura</surname>
<given-names>S</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>CLCA2 as a novel immunohistochemical marker for differential diagnosis of squamous cell carcinoma from adenocarcinoma of the lung</article-title>
<source>Dis Markers</source>
<year iso-8601-date="2014">2014</year>
<volume>2014</volume>
<elocation-id>619273</elocation-id>
<pub-id pub-id-type="doi">10.1155/2014/619273</pub-id><pub-id pub-id-type="pmid">25548429</pub-id><pub-id pub-id-type="pmcid">PMC4274868</pub-id></element-citation>
</ref>
<ref id="B38">
<label>38</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Savci-Heijink</surname>
<given-names>CD</given-names>
</name>
<name>
<surname>Kosari</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Aubry</surname>
<given-names>MC</given-names>
</name>
<name>
<surname>Caron</surname>
<given-names>BL</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>P</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>The role of desmoglein-3 in the diagnosis of squamous cell carcinoma of the lung</article-title>
<source>Am J Pathol</source>
<year iso-8601-date="2009">2009</year>
<volume>174</volume>
<fpage>1629</fpage>
<lpage>37</lpage>
<pub-id pub-id-type="doi">10.2353/ajpath.2009.080778</pub-id><pub-id pub-id-type="pmid">19342368</pub-id><pub-id pub-id-type="pmcid">PMC2671252</pub-id></element-citation>
</ref>
<ref id="B39">
<label>39</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fukuoka</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Dracheva</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Shih</surname>
<given-names>JH</given-names>
</name>
<name>
<surname>Hewitt</surname>
<given-names>SM</given-names>
</name>
<name>
<surname>Fujii</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Kishor</surname>
<given-names>A</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Desmoglein 3 as a prognostic factor in lung cancer</article-title>
<source>Hum Pathol</source>
<year iso-8601-date="2007">2007</year>
<volume>38</volume>
<fpage>276</fpage>
<lpage>83</lpage>
<pub-id pub-id-type="doi">10.1016/j.humpath.2006.08.006</pub-id><pub-id pub-id-type="pmid">17084439</pub-id></element-citation>
</ref>
<ref id="B40">
<label>40</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Wo</surname>
<given-names>Y</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Desmoglein 3 and keratin 14 for distinguishing between lung adenocarcinoma and lung squamous cell carcinoma</article-title>
<source>Onco Targets Ther</source>
<year iso-8601-date="2020">2020</year>
<volume>13</volume>
<fpage>11111</fpage>
<lpage>24</lpage>
<pub-id pub-id-type="doi">10.2147/OTT.S270398</pub-id><pub-id pub-id-type="pmid">33149622</pub-id><pub-id pub-id-type="pmcid">PMC7605657</pub-id></element-citation>
</ref>
<ref id="B41">
<label>41</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>RJ</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>AG</given-names>
</name>
<name>
<surname>Bian</surname>
<given-names>K</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>A miR-205-LPCAT1 axis contributes to proliferation and progression in multiple cancers</article-title>
<source>Biochem Biophys Res Commun</source>
<year iso-8601-date="2020">2020</year>
<volume>527</volume>
<fpage>474</fpage>
<lpage>80</lpage>
<pub-id pub-id-type="doi">10.1016/j.bbrc.2020.04.071</pub-id><pub-id pub-id-type="pmid">32334831</pub-id></element-citation>
</ref>
<ref id="B42">
<label>42</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>LPCAT1 promotes brain metastasis of lung adenocarcinoma by up-regulating PI3K/AKT/MYC pathway</article-title>
<source>J Exp Clin Cancer Res</source>
<year iso-8601-date="2019">2019</year>
<volume>38</volume>
<elocation-id>95</elocation-id>
<pub-id pub-id-type="doi">10.1186/s13046-019-1092-4</pub-id><pub-id pub-id-type="pmid">30791942</pub-id><pub-id pub-id-type="pmcid">PMC6385475</pub-id></element-citation>
</ref>
<ref id="B43">
<label>43</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>ZJ</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>WD</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>JQ</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
</person-group>
<article-title>High expression of SLC6A10P contributes to poor prognosis in lung adenocarcinoma</article-title>
<source>Int J Clin Exp Pathol</source>
<year iso-8601-date="2018">2018</year>
<volume>11</volume>
<fpage>720</fpage>
<lpage>6</lpage>
<pub-id pub-id-type="pmid">31938158</pub-id><pub-id pub-id-type="pmcid">PMC6958030</pub-id></element-citation>
</ref>
<ref id="B44">
<label>44</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lucchetta</surname>
<given-names>M</given-names>
</name>
<name>
<surname>da Piedade</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Mounir</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Vabistsevits</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Terkelsen</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Papaleo</surname>
<given-names>E</given-names>
</name>
</person-group>
<article-title>Distinct signatures of lung cancer types: aberrant mucin O-glycosylation and compromised immune response</article-title>
<source>BMC Cancer</source>
<year iso-8601-date="2019">2019</year>
<volume>19</volume>
<elocation-id>824</elocation-id>
<pub-id pub-id-type="doi">10.1186/s12885-019-5965-x</pub-id><pub-id pub-id-type="pmid">31429720</pub-id><pub-id pub-id-type="pmcid">PMC6702745</pub-id></element-citation>
</ref>
<ref id="B45">
<label>45</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>S</given-names>
</name>
</person-group>
<article-title>Identification of differentially-expressed genes between early-stage adenocarcinoma and squamous cell carcinoma lung cancer using meta-analysis methods</article-title>
<source>Oncol Lett</source>
<year iso-8601-date="2017">2017</year>
<volume>13</volume>
<fpage>3314</fpage>
<lpage>22</lpage>
<pub-id pub-id-type="doi">10.3892/ol.2017.5838</pub-id><pub-id pub-id-type="pmid">28521438</pub-id><pub-id pub-id-type="pmcid">PMC5431262</pub-id></element-citation>
</ref>
<ref id="B46">
<label>46</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Warth</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Muley</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Herpel</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Meister</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Herth</surname>
<given-names>FJ</given-names>
</name>
<name>
<surname>Schirmacher</surname>
<given-names>P</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Large-scale comparative analyses of immunomarkers for diagnostic subtyping of non-small-cell lung cancer biopsies</article-title>
<source>Histopathology</source>
<year iso-8601-date="2012">2012</year>
<volume>61</volume>
<fpage>1017</fpage>
<lpage>25</lpage>
<pub-id pub-id-type="doi">10.1111/j.1365-2559.2012.04308.x</pub-id><pub-id pub-id-type="pmid">22882703</pub-id></element-citation>
</ref>
<ref id="B47">
<label>47</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsuta</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Tanabe</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yoshida</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Takahashi</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Maeshima</surname>
<given-names>AM</given-names>
</name>
<name>
<surname>Asamura</surname>
<given-names>H</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Utility of 10 immunohistochemical markers including novel markers (desmocollin-3, glypican 3, S100A2, S100A7, and Sox-2) for differential diagnosis of squamous cell carcinoma from adenocarcinoma of the lung</article-title>
<source>J Thorac Oncol</source>
<year iso-8601-date="2011">2011</year>
<volume>6</volume>
<fpage>1190</fpage>
<lpage>9</lpage>
<pub-id pub-id-type="doi">10.1097/JTO.0b013e318219ac78</pub-id><pub-id pub-id-type="pmid">21623236</pub-id></element-citation>
</ref>
<ref id="B48">
<label>48</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Angulo</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Suarez-Gauthier</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Lopez-Rios</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Medina</surname>
<given-names>PP</given-names>
</name>
<name>
<surname>Conde</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>M</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Expression signatures in lung cancer reveal a profile for <italic>EGFR</italic>-mutant tumours and identify selective <italic>PIK3CA</italic> overexpression by gene amplification</article-title>
<source>J Pathol</source>
<year iso-8601-date="2008">2008</year>
<volume>214</volume>
<fpage>347</fpage>
<lpage>56</lpage>
<pub-id pub-id-type="doi">10.1002/path.2267</pub-id><pub-id pub-id-type="pmid">17992665</pub-id></element-citation>
</ref>
<ref id="B49">
<label>49</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Knösel</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Pacyna-Gengelbach</surname>
<given-names>M</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>The p53 target gene desmocollin 3 acts as a novel tumor suppressor through inhibiting EGFR/ERK pathway in human lung cancer</article-title>
<source>Carcinogenesis</source>
<year iso-8601-date="2012">2012</year>
<volume>33</volume>
<fpage>2326</fpage>
<lpage>33</lpage>
<pub-id pub-id-type="doi">10.1093/carcin/bgs273</pub-id><pub-id pub-id-type="pmid">22941060</pub-id></element-citation>
</ref>
<ref id="B50">
<label>50</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wainer</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>GM</given-names>
</name>
<name>
<surname>Gough</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Daniels</surname>
<given-names>MG</given-names>
</name>
<name>
<surname>Russell</surname>
<given-names>PA</given-names>
</name>
<name>
<surname>Choong</surname>
<given-names>P</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Sex-dependent staging in non-small-cell lung cancer; analysis of the effect of sex differences in the eighth edition of the tumor, node, metastases staging system</article-title>
<source>Clin Lung Cancer</source>
<year iso-8601-date="2018">2018</year>
<volume>19</volume>
<fpage>e933</fpage>
<lpage>44</lpage>
<pub-id pub-id-type="doi">10.1016/j.cllc.2018.08.004</pub-id><pub-id pub-id-type="pmid">30206043</pub-id></element-citation>
</ref>
<ref id="B51">
<label>51</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Radkiewicz</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Dickman</surname>
<given-names>PW</given-names>
</name>
<name>
<surname>Johansson</surname>
<given-names>ALV</given-names>
</name>
<name>
<surname>Wagenius</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Edgren</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Lambe</surname>
<given-names>M</given-names>
</name>
</person-group>
<article-title>Sex and survival in non-small cell lung cancer: a nationwide cohort study</article-title>
<source>PLoS One</source>
<year iso-8601-date="2019">2019</year>
<volume>14</volume>
<elocation-id>e0219206</elocation-id>
<pub-id pub-id-type="doi">10.1371/journal.pone.0219206</pub-id><pub-id pub-id-type="pmid">31247015</pub-id><pub-id pub-id-type="pmcid">PMC6597110</pub-id></element-citation>
</ref>
<ref id="B52">
<label>52</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ivanova</surname>
<given-names>MM</given-names>
</name>
<name>
<surname>Mazhawidza</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Dougherty</surname>
<given-names>SM</given-names>
</name>
<name>
<surname>Klinge</surname>
<given-names>CM</given-names>
</name>
</person-group>
<article-title>Sex differences in estrogen receptor subcellular location and activity in lung adenocarcinoma cells</article-title>
<source>Am J Respir Cell Mol Biol</source>
<year iso-8601-date="2010">2010</year>
<volume>42</volume>
<fpage>320</fpage>
<lpage>30</lpage>
<pub-id pub-id-type="doi">10.1165/rcmb.2009-0059OC</pub-id><pub-id pub-id-type="pmid">19556604</pub-id><pub-id pub-id-type="pmcid">PMC2830404</pub-id></element-citation>
</ref>
<ref id="B53">
<label>53</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rubin</surname>
<given-names>JB</given-names>
</name>
<name>
<surname>Lagas</surname>
<given-names>JS</given-names>
</name>
<name>
<surname>Broestl</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Sponagel</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Rockwell</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Rhee</surname>
<given-names>G</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Sex differences in cancer mechanisms</article-title>
<source>Biol Sex Differ</source>
<year iso-8601-date="2020">2020</year>
<volume>11</volume>
<elocation-id>17</elocation-id>
<pub-id pub-id-type="doi">10.1186/s13293-020-00291-x</pub-id><pub-id pub-id-type="pmid">32295632</pub-id><pub-id pub-id-type="pmcid">PMC7161126</pub-id></element-citation>
</ref>
<ref id="B54">
<label>54</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruch</surname>
<given-names>R</given-names>
</name>
</person-group>
<article-title>Gap junctions and connexins in cancer formation, progression, and therapy</article-title>
<source>Cancers (Basel)</source>
<year iso-8601-date="2020">2020</year>
<volume>12</volume>
<elocation-id>3307</elocation-id>
<pub-id pub-id-type="doi">10.3390/cancers12113307</pub-id><pub-id pub-id-type="pmid">33182480</pub-id><pub-id pub-id-type="pmcid">PMC7697820</pub-id></element-citation>
</ref>
<ref id="B55">
<label>55</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soini</surname>
<given-names>Y</given-names>
</name>
</person-group>
<article-title>Tight junctions in lung cancer and lung metastasis: a review</article-title>
<source>Int J Clin Exp Pathol</source>
<year iso-8601-date="2012">2012</year>
<volume>5</volume>
<fpage>126</fpage>
<lpage>36</lpage>
<pub-id pub-id-type="pmid">22400072</pub-id><pub-id pub-id-type="pmcid">PMC3294225</pub-id></element-citation>
</ref>
<ref id="B56">
<label>56</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhat</surname>
<given-names>AA</given-names>
</name>
<name>
<surname>Uppada</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Achkar</surname>
<given-names>IW</given-names>
</name>
<name>
<surname>Hashem</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Yadav</surname>
<given-names>SK</given-names>
</name>
<name>
<surname>Shanmugakonar</surname>
<given-names>M</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Tight junction proteins and signaling pathways in cancer and inflammation: a functional crosstalk</article-title>
<source>Front Physiol</source>
<year iso-8601-date="2019">2019</year>
<volume>9</volume>
<elocation-id>1942</elocation-id>
<pub-id pub-id-type="doi">10.3389/fphys.2018.01942</pub-id><pub-id pub-id-type="pmid">30728783</pub-id><pub-id pub-id-type="pmcid">PMC6351700</pub-id></element-citation>
</ref>
<ref id="B57">
<label>57</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>H</given-names>
</name>
</person-group>
<article-title><italic>SLC6A8</italic> is involved in the progression of non-small cell lung cancer through the Notch signaling pathway</article-title>
<source>Ann Transl Med</source>
<year iso-8601-date="2021">2021</year>
<volume>9</volume>
<elocation-id>264</elocation-id>
<comment>Erratum in: Ann Transl Med. 2022;10:845.</comment>
<pub-id pub-id-type="doi">10.21037/atm-20-5984</pub-id><pub-id pub-id-type="pmid">33708891</pub-id><pub-id pub-id-type="pmcid">PMC7940877</pub-id></element-citation>
</ref>
<ref id="B58">
<label>58</label>
<element-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nakakido</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Tamura</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Chung</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Ueda</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Fujii</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Kiyotani</surname>
<given-names>K</given-names>
</name>
<etal>et al.</etal>
</person-group>
<article-title>Phosphatidylinositol glycan anchor biosynthesis, class X containing complex promotes cancer cell proliferation through suppression of EHD2 and ZIC1, putative tumor suppressors</article-title>
<source>Int J Oncol</source>
<year iso-8601-date="2016">2016</year>
<volume>49</volume>
<fpage>868</fpage>
<lpage>76</lpage>
<pub-id pub-id-type="doi">10.3892/ijo.2016.3607</pub-id><pub-id pub-id-type="pmid">27572108</pub-id><pub-id pub-id-type="pmcid">PMC4948962</pub-id></element-citation>
</ref>
</ref-list>
</back>
</article>