diff --git a/docs/_sources/matminer.utils.rst.txt b/docs/_sources/matminer.utils.rst.txt index a32e2d0d9..97d3ee9ac 100644 --- a/docs/_sources/matminer.utils.rst.txt +++ b/docs/_sources/matminer.utils.rst.txt @@ -69,6 +69,14 @@ matminer.utils.utils module :undoc-members: :show-inheritance: +matminer.utils.warnings module +------------------------------ + +.. automodule:: matminer.utils.warnings + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/docs/_sources/matminer.utils.tests.rst.txt b/docs/_sources/matminer.utils.tests.rst.txt index 325f4675f..31e15ffcb 100644 --- a/docs/_sources/matminer.utils.tests.rst.txt +++ b/docs/_sources/matminer.utils.tests.rst.txt @@ -36,6 +36,14 @@ matminer.utils.tests.test\_io module :undoc-members: :show-inheritance: +matminer.utils.tests.test\_utils module +--------------------------------------- + +.. automodule:: matminer.utils.tests.test_utils + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/docs/_static/basic.css b/docs/_static/basic.css index 30fee9d0f..f316efcb4 100644 --- a/docs/_static/basic.css +++ b/docs/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/_static/doctools.js b/docs/_static/doctools.js index d06a71d75..4d67807d1 100644 --- a/docs/_static/doctools.js +++ b/docs/_static/doctools.js @@ -4,7 +4,7 @@ * * Base JavaScript utilities for all Sphinx HTML documentation. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index c5830ef94..97ac141b8 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '0.9.2', + VERSION: '0.9.3', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/_static/language_data.js b/docs/_static/language_data.js index 250f5665f..367b8ed81 100644 --- a/docs/_static/language_data.js +++ b/docs/_static/language_data.js @@ -5,7 +5,7 @@ * This script contains the language-specific data used by searchtools.js, * namely the list of stopwords, stemmer, scorer and splitter. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -13,7 +13,7 @@ var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; -/* Non-minified version is copied as a separate JS file, is available */ +/* Non-minified version is copied as a separate JS file, if available */ /** * Porter Stemmer diff --git a/docs/_static/searchtools.js b/docs/_static/searchtools.js index 7918c3fab..b08d58c9b 100644 --- a/docs/_static/searchtools.js +++ b/docs/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -99,7 +99,7 @@ const _displayItem = (item, searchTerms, highlightTerms) => { .then((data) => { if (data) listItem.appendChild( - Search.makeSearchSummary(data, searchTerms) + Search.makeSearchSummary(data, searchTerms, anchor) ); // highlight search terms in the summary if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js @@ -116,8 +116,8 @@ const _finishSearch = (resultCount) => { ); else Search.status.innerText = _( - `Search finished, found ${resultCount} page(s) matching the search query.` - ); + "Search finished, found ${resultCount} page(s) matching the search query." + ).replace('${resultCount}', resultCount); }; const _displayNextItem = ( results, @@ -137,6 +137,22 @@ const _displayNextItem = ( // search finished, update title and status message else _finishSearch(resultCount); }; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; /** * Default splitQuery function. Can be overridden in ``sphinx.search`` with a @@ -160,13 +176,26 @@ const Search = { _queued_query: null, _pulse_status: -1, - htmlToText: (htmlString) => { + htmlToText: (htmlString, anchor) => { const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); - htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content const docContent = htmlElement.querySelector('[role="main"]'); - if (docContent !== undefined) return docContent.textContent; + if (docContent) return docContent.textContent; + console.warn( - "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." ); return ""; }, @@ -239,16 +268,7 @@ const Search = { else Search.deferQuery(query); }, - /** - * execute search (requires search index to be loaded) - */ - query: (query) => { - const filenames = Search._index.filenames; - const docNames = Search._index.docnames; - const titles = Search._index.titles; - const allTitles = Search._index.alltitles; - const indexEntries = Search._index.indexentries; - + _parseQuery: (query) => { // stem the search terms and add them to the correct list const stemmer = new Stemmer(); const searchTerms = new Set(); @@ -284,21 +304,38 @@ const Search = { // console.info("required: ", [...searchTerms]); // console.info("excluded: ", [...excludedTerms]); - // array of [docname, title, anchor, descr, score, filename] - let results = []; + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename]. + const normalResults = []; + const nonMainIndexResults = []; + _removeChildren(document.getElementById("search-progress")); - const queryLower = query.toLowerCase(); + const queryLower = query.toLowerCase().trim(); for (const [title, foundTitles] of Object.entries(allTitles)) { - if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) - results.push([ + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, - score, + score + boost, filenames[file], ]); } @@ -308,46 +345,47 @@ const Search = { // search for explicit entries in index directives for (const [entry, foundEntries] of Object.entries(indexEntries)) { if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { - for (const [file, id] of foundEntries) { - let score = Math.round(100 * queryLower.length / entry.length) - results.push([ + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ docNames[file], titles[file], id ? "#" + id : "", null, score, filenames[file], - ]); + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } } } } // lookup as object objectTerms.forEach((term) => - results.push(...Search.performObjectSearch(term, objectTerms)) + normalResults.push(...Search.performObjectSearch(term, objectTerms)) ); // lookup as search terms in fulltext - results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); // let the scorer override scores with a custom scoring function - if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); - - // now sort the results by score (in opposite order of appearance, since the - // display function below uses pop() to retrieve items) and then - // alphabetically - results.sort((a, b) => { - const leftScore = a[4]; - const rightScore = b[4]; - if (leftScore === rightScore) { - // same score: sort alphabetically - const leftTitle = a[1].toLowerCase(); - const rightTitle = b[1].toLowerCase(); - if (leftTitle === rightTitle) return 0; - return leftTitle > rightTitle ? -1 : 1; // inverted is intentional - } - return leftScore > rightScore ? 1 : -1; - }); + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; // remove duplicate search results // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept @@ -361,7 +399,12 @@ const Search = { return acc; }, []); - results = results.reverse(); + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); // for debugging //Search.lastresults = results.slice(); // a copy @@ -466,14 +509,18 @@ const Search = { // add support for partial matches if (word.length > 2) { const escapedWord = _escapeRegExp(word); - Object.keys(terms).forEach((term) => { - if (term.match(escapedWord) && !terms[word]) - arr.push({ files: terms[term], score: Scorer.partialTerm }); - }); - Object.keys(titleTerms).forEach((term) => { - if (term.match(escapedWord) && !titleTerms[word]) - arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); - }); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } } // no match but word was a required one @@ -496,9 +543,8 @@ const Search = { // create the mapping files.forEach((file) => { - if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) - fileMap.get(file).push(word); - else fileMap.set(file, [word]); + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); }); }); @@ -549,8 +595,8 @@ const Search = { * search summary for a given text. keywords is a list * of stemmed words. */ - makeSearchSummary: (htmlText, keywords) => { - const text = Search.htmlToText(htmlText); + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); if (text === "") return null; const textLower = text.toLowerCase(); diff --git a/docs/changelog.html b/docs/changelog.html index 0ace3dd39..4b0d1f43a 100644 --- a/docs/changelog.html +++ b/docs/changelog.html @@ -5,11 +5,11 @@ - MatMiner Changelog — matminer 0.9.2 documentation + MatMiner Changelog — matminer 0.9.3 documentation - - + + @@ -17,7 +17,7 @@ - - - - -_images/example_bulkmod_rf.png +_images/example_bulkmod_rf.png +

Step 6b: Plot the importance of the features we used

importances = rf.feature_importances_
 X_cols = np.asarray(X_cols)
@@ -300,7 +302,8 @@ 

Step 6: Plot our results and determine what features are the most important< pf.bar_chart(x=X_cols[indices], y=importances[indices])

-_images/example_bulkmod_feats.png +_images/example_bulkmod_feats.png + @@ -310,7 +313,7 @@

Step 6: Plot our results and determine what features are the most important< - - - - - - - -