Datasets

PubChem RDF

PubChem is an open chemistry database at the National Institutes of Health (NIH). For the detailed information, please see the following URL: https://pubchemdocs.ncbi.nlm.nih.gov/about. Note: The endpoint does not include files provided at ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF/compound/nbr2d/ and ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF/compound/nbr3d/

Dataset specifications

Tags
Drug/Chemical Other biomolecule
Provenance Original
Registration Added by RDF portal
Data provider
  • National Center for Biotechnology Information
Creator
Issued 2025-05-28
Licenses
Download https://rdfportal.org/download/pubchem
SPARQL Endpoint https://rdfportal.org/pubchem/sparql

Dataset statistics

Triples
21959412691
Subjects
4377438077
Properties
80
Objects
5858310337
Classes
257102

SPARQL example queries

Example 1

Run on Endpoint
# Select tradename, SMILES and IUPAC InChI for FDA approved drugs
# with TPSA (topological polar surface area) value less than 120

PREFIX cpd: <http://rdf.ncbi.nlm.nih.gov/pubchem/compound/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sio: <http://semanticscience.org/resource/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX descriptor: <http://rdf.ncbi.nlm.nih.gov/pubchem/descriptor/>
PREFIX compound: <http://rdf.ncbi.nlm.nih.gov/pubchem/compound/>
PREFIX substance: <http://rdf.ncbi.nlm.nih.gov/pubchem/substance/>
PREFIX pubchemv: <http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary#>

SELECT DISTINCT ?tradename ?cid ?canonical_smiles ?iupac_inchi ?tpsa_value
WHERE {
  ?cid obo:RO_0000087 pubchemv:FDAApprovedDrugs ;
       sio:SIO_000008 ?desc_canonical_smiles ;
       sio:SIO_000008 ?desc_iupac_inchi ;
       sio:SIO_000008 ?desc_tpsa .
  ?desc_canonical_smiles a sio:CHEMINF_000376 ;
                         sio:SIO_000300 ?canonical_smiles .
  ?desc_iupac_inchi a sio:CHEMINF_000396 ;
                    sio:SIO_000300 ?iupac_inchi .
  ?desc_tpsa a sio:CHEMINF_000392 ;
               sio:SIO_000300 ?tpsa .
  FILTER(?tpsa < 120.0)
  ?sid sio:CHEMINF_000477 ?cid ;
       sio:SIO_000008 ?desc_trade_name .
  ?desc_trade_name a sio:CHEMINF_000561 ;
                   sio:SIO_000300 ?tradename .
  BIND(STR(?tpsa) AS ?tpsa_value)
}
LIMIT 100

Example 2

Run on Endpoint
# Endpoint: https://rdfportal.org/pubchem/sparql
# Description: List the pairs of PubChem substance and its counterpart in PubChem Compound

PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX bao: <http://www.bioassayontology.org/bao#>
PREFIX sio: <http://semanticscience.org/resource/>

SELECT DISTINCT ?PubChemSubstance ?substance_normalized_counterpart
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/compound>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/substance>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/descriptor/compound>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/descriptor/substance>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/synonym>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/inchikey>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/measuregroup>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/endpoint>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/bioassay>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/protein>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/pathway>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/conserveddomain>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/gene>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/reference>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/source>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/concept>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/patent>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/taxonomy>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/cell>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/cooccurrence>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/disease>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/book>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/journal>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/author>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/organization>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/grant>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/void>
WHERE {
    OPTIONAL {
        ?PubChemSubstance sio:CHEMINF_000477 ?substance_normalized_counterpart .
    }
}
LIMIT 100

Example 3

Run on Endpoint
# Endpoint: https://rdfportal.org/pubchem/sparql
# Description: What are the top five diseases commonly mentioned with indomethacin (CID3715)?  (cf. https://pubchem.ncbi.nlm.nih.gov/docs/rdf-use-cases#section=Case-11-What-are-the-top-five-diseases-commonly-mentioned-with-indomethacin-CID3715)
# Parameter: compound_disease_cooccurrence_subject: (example: compound:CID3715)

PREFIX sio: <http://semanticscience.org/resource/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX bao: <http://www.bioassayontology.org/bao#>
PREFIX cito: <http://purl.org/spar/cito/>
PREFIX epo_patent: <http://data.epo.org/linked-data/def/patent/>
PREFIX vocab: <http://rdf.ncbi.nlm.nih.gov/pubchem/vocabulary#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX compound: <http://rdf.ncbi.nlm.nih.gov/pubchem/compound/>

SELECT ?PubChemCompoundDiseaseCooccurrence ?compound_disease_cooccurrence_subject ?compound_disease_cooccurrence_object ?compound_disease_cooccurrence_score ?disease_pref_label
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/compound>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/substance>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/descriptor/compound>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/descriptor/substance>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/synonym>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/inchikey>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/measuregroup>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/endpoint>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/bioassay>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/protein>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/pathway>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/conserveddomain>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/gene>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/reference>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/source>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/concept>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/patent>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/taxonomy>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/cell>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/cooccurrence>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/disease>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/book>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/journal>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/author>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/organization>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/grant>
FROM <http://rdf.ncbi.nlm.nih.gov/pubchem/void>
WHERE {
    VALUES ?compound_disease_cooccurrence_subject { compound:CID3715 }
    ?PubChemCompoundDiseaseCooccurrence a sio:SIO_000993 ;
        rdf:subject ?compound_disease_cooccurrence_subject ;
        rdf:object ?compound_disease_cooccurrence_object ;
        sio:SIO_000300 ?compound_disease_cooccurrence_score .
    OPTIONAL {
        ?PubChemCompound obo:IAO_0000136 / cito:isDiscussedBy / vocab:discussesAsDerivedByTextMining / skos:prefLabel ?disease_pref_label .
    }
}
ORDER BY DESC(?compound_disease_cooccurrence_score)
LIMIT 5

Schema diagram

Schema diagram for pubchem
Schema diagram for pubchem