Source code for gdsctools.regression_report

# -*- python -*-
# -*- coding utf-8 -*-
#
#  This file is part of GDSCTools software
#
#  Copyright (c) 2015 - Wellcome Trust Sanger Institute
#  All rights reserved
#
#  File author(s): Thomas Cokelaer <cokelaer@gmail.comWE HERE>
#
#  Distributed under the BSD 3-Clause License.
#  See accompanying file LICENSE.txt distributed with this software
#
#  website: http://github.com/CancerRxGene/gdsctools
#
##############################################################################
"""Code related to the Regression analysis to find associations between drug IC50s
and genomic features"""
import os
import glob
import json

from gdsctools.report import HTMLTable, ReportMain
from gdsctools.readers import DrugDecode
from gdsctools.volcano import ScatterJS

import colorlog as logger

import pandas as pd
import pylab

import easydev
from colormap import cmap_builder


__all__ = ['RegressionReport']


[docs]class RegressionReport(object): """Class used to interpret the results and create final HTML report """ def __init__(self, method, directory=".", verbose=True, image_dir="images", data_dir="data", config={"boxplot_n": "?"}): """.. rubric:: Constructor :param method: Method used in the regression analysis (lasso, elasticnet, ridge) :param results: """ self.image_dir = image_dir self.method = method self.config = config self.directory = directory self.output_dir = "." self.verbose = verbose self.prefix = "gdsctools_regression_" self.prefix_images = image_dir + os.sep +"gdsctools_regression_" self.prefix_data = data_dir + os.sep +"gdsctools_regression_" self.filenames = glob.glob(self.prefix_images + "boxplot_*png") self.drugids = [this.rstrip(".png").lstrip(self.prefix_images).lstrip("boxplot_") for this in self.filenames]
[docs] def create_html_drug(self): """report for each individual drug""" for drugid in self.drugids: logger.info('Creating HTML report for drug %s' % drugid) report = HTMLOneDrug(drugid, caller=self) report.create_report(onweb=False)
[docs] def create_html_main(self, onweb=False): """Create HTML main document (summary)""" if self.verbose: logger.info("Creating main HTML page in directory %s" % (self.directory)) ReportMain(directory=self.directory, verbose=self.verbose, mode="summary") html = HTMLPageMain(caller=self) html.create_report(onweb=onweb)
class HTMLOneDrug(ReportMain): def __init__(self, drugid, caller): self.drug = int(drugid) self.caller = caller filename = "drug_{0}.html".format(self.drug) super(HTMLOneDrug, self).__init__( directory=caller.output_dir, filename=filename, template_filename='regression.html', init_report=False) self.title = 'Single Drug analysis (%s)' % self.drug self.params = {"drugid": self.drug} filename_template = self.caller.prefix_data + "%(name)s_" + "%s." % self.drug results_filename = filename_template % {"name":"results"} + "json" with open(results_filename, "r") as fh: data = json.loads(fh.read()) try:data["bayes"] = easydev.precision(data['bayes'], 3) except:pass try:data["alpha"] = easydev.precision(data['alpha'], 5) except:pass try:data["Rp"] = easydev.precision(data['Rp'], 4) except:pass self.params.update(data) self.params['method'] = self.caller.method self.jinja['sections'] = [] self.jinja['goback'] = True def _create_report(self, onweb=True): section = """<div> <b>DrugID:</b> %(drugid)s</br> <b>Regression method:</b> %(method)s </br> <b>Regression, alpha parameter used:</b> %(alpha)s</br> <b>Bayes factor:</b> %(bayes)s</br> <b>Coefficient of regression (pearson):</b> %(Rp)s</br> </div> """ % self.params self.jinja['sections'].append(section) text = {} text['boxplot'] = ("This boxplot shows the %s most important features " "(based on the weights of the regression).") text['boxplot'] %= self.caller.config["boxplot_n"] text['importance'] = ("Feature with non-null weights. If empty, it" " means no feature of interests were found") text['randomness'] = ("Here we run the regression analysis %s times and " "plot the regression value (x-axis) for the real data (blue) " "and randomising the variable to explain (red). " ) text['randomness'] %= self.caller.config['randomness'] text['weights'] = ("Feature with non-null weights. If empty, it" " means no feature of interests were found") for this in ["boxplot", "randomness", "importance", "weights"]: self.params['name'] = this self.params['text'] = text[this] filename = self.caller.prefix_images + "%(name)s_%(drugid)s.png" % self.params self.params["filename"] = filename self.params['title'] = this.title() if os.path.exists(filename): section = """<div> <h2>%(title)s results</h2> <p>%(text)s</p> <img src="%(filename)s"> """ % self.params self.jinja['sections'].append(section) else: logger.warning("%s not found. Skipped" % filename) class HTMLPageMain(ReportMain): def __init__(self, caller): sepjoin = os.sep.join super(HTMLPageMain, self).__init__( directory=caller.directory, template_filename="regression.html", filename="index.html", mode="summary") self.caller = caller self.jinja['analysis_domain'] = "PANCAN" self.jinja['title'] = "Regression analysis summary" self.jinja['sections'] = [] #self.jinja["collaborator"] = report.company def _create_report(self, onweb=True): # The top section with standard information section = """<div> <b>Regression method:</b> %s </br> </div><hr> """ % self.caller.method self.jinja['sections'].append(section) # The main CSV tables with bayes factor and links to each drug ID filename = self.caller.prefix_data + "results.csv" df = pd.read_csv(filename) df['ttest (-log10)'] = -pylab.log10(df['ttest']) # prevents inf to fail in the HTMLTable table = HTMLTable(df) table.add_bgcolor('bayes') table.add_bgcolor('Rp') table.df['drugid'] = ['<a href="drug_%s.html">%s</a>' % (x, x) for x in table.df['drugid']] html = ("<div><p>This table contains links to all drugs (first column)." " The Rp column contains the coefficient of correlation" " (pearson) found with the regression method for the alpha" " parameter provided in column 3. The alpha value is the optimised" " value obtained using a cross validation (see below)." " The ln_alpha column is just the -log10(alpha) value. The bayes" " factor gives an idea of the significance of the correlation as " " compared to a null distribution. See" ' <a href="http://gdsctools.readthedocs.io/en/master/references.html">' 'gdstools documentation.</a> for details.' "<br>" " Note also that the optimisation of the alpha parameter is" " performed using a cross validation and depends on a few" " parameters such as the range of alpha values, number of " " cross validation, ....</p>") html += table.to_html(index=False) pattern = '<div>%s <p>Download the CSV <a href="%s">file</a></p></div><hr>' pattern = pattern % (html, filename) html = pattern self.jinja['sections'].append(html) # The scatter plot. First the javascript in the header self._set_scatter() # and the section itself html = """ <div class="wrap"> <div class="content"> <center> <canvas id='canvasVolcano' width='800' height='540'></canvas> </center> </div> <div class="clear">&nbsp;</div> </div> """ self.jinja["sections"].append(html) def _set_scatter(self): filename = self.caller.prefix_data + "results.csv" df = pd.read_csv(filename) df["markersize"] = 20 js = ScatterJS(df, x="Rp", y="bayes", color="bayes", size="markersize") js.xlabel = "Coefficient correlation (pearson)" js.ylabel = "Bayes factor" self.jinja['volcano_jsdata'] = js.get_html()