diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ac24acb..0000000 --- a/.travis.yml +++ /dev/null @@ -1,53 +0,0 @@ -language: c -sudo: true - -# deploy: -# provider: pypi -# user: bgruening -# skip_cleanup: true -# password: -# secure: AZVacwZdbN+vKXpZm/XjhRrlkJ9LBEn3Xzz7HkOo/yOk6M2rOHpjumsYzIB6YXnJGrR8ZdOJ3nDOwRaqq3LItXLcH05Pf6cajNwGzD3XLKFpqT+M+RYyki+hKjpEZtdKkkxas4FJQ1+x+yQkbYOjyWYz25xJOaAMOmyTmSZio8vK/0k/XJA+yg1uBHUc+xlP7MKygDItXDx0WVEKhbh+I/EqIi2D+SJFtgao/Y25qFzpplmXE5goe6eM4vAFrAzlahadfWggy2N+fx7gkMB5MOzxeUwKYbp7d2qftnSsBO+9P1Bh78g1nBw1MH3a+zqCGeMTmdbU1VdZUXuUsc5aiEL2lVcYOcB499rmssLuS5FALrtVxmEVMRVd/TQD8BOTmZIc7msuoviNt/HChEN6ldkNCHDh1paAnSMsbSmvKmFKr4rXkQNhvKa1D+78MtypPdxgt5cap+WMxGou4+WpzQ2OYfCrcFLWPwIyyBZ4tUr5xXX84AOmyD2OopbFq/1k1wkBvITWexncxxHdFQfCmzO5lDLruWKzr5FyrrBAgZERveqy5PHMp6v8teHfHTVnUMTuB2LZFltBdJL4U+bsOKy0WColpcppBo/kbvSQJY1YgMPuYBRrK1P1r0NkP83kywoAFwC0uMjfBvPcbYGQJHLLRYUYlDDmdao+sbMF6u8= -# on: -# tags: true -# distributions: "sdist bdist_wheel" -# python: -# - 3.6 -# - 3.7 - - -env: - - TRAVIS_PYTHON_VERSION=3.6 - - TRAVIS_PYTHON_VERSION=3.7 -os: -- linux -- osx - -stages: -- lint -- test - -before_install: - -- export HIC_TEST_DATA_DIR="`pwd`/hicmatrix/test/test_data/" -- echo $HIC_TEST_DATA_DIR -- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then curl -L https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o miniconda.sh ; fi -- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then curl -L https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o miniconda.sh ; fi -- bash miniconda.sh -b -p $HOME/miniconda -- PATH_WITHOUT_CONDA="$PATH" -- export PATH="$HOME/miniconda/bin:$PATH" -- hash -r -- conda config --set always_yes yes --set changeps1 no -- conda update -q conda -- conda info -a - -install: -- conda install --yes -c conda-forge -c bioconda python=$TRAVIS_PYTHON_VERSION --file - requirements.txt -- conda install --yes -c conda-forge -c bioconda pytest -- conda install --yes flake8 -- python setup.py install - -script: -- flake8 . --exclude=.venv,.build,planemo_test_env,build --ignore=E501,F401,F403,E402,F999,F405,E712 -- py.test hicmatrix/test/ --capture=sys - diff --git a/README.rst b/README.rst index 064b336..bfd4a14 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,11 @@ HiCMatrix =========== -This library implements the central class of HiCExplorer to manage Hi-C interaction matrices. It is separated from the main project to enable to use of Hi-C matrices -in other project without the dependency to HiCExplorer. Moreover, it enables us to use the already separated pyGenomeTracks (former hicPlotTADs) to be used in HiCExplorer +This library implements the central class of HiCExplorer to manage Hi-C interaction matrices. It is separated from the main project to enable Hi-C matrices +in other projects without the dependency on HiCExplorer. Moreover, it enables us to use the already separated pyGenomeTracks (former hicPlotTADs) in HiCExplorer because mutual dependencies are resolved. -With version 8 we dropped the support for Python 2. +With version 8, we dropped the support for Python 2. Version 14 introduced the official support for scool file format, used by scHiCExplorer since version 5: https://github.com/joachimwolff/scHiCExplorer and https://schicexplorer.readthedocs.io/en/latest/. @@ -13,7 +13,7 @@ Read support ------------- - h5 -- cool +- cool / mcool / scool - hicpro - homer @@ -21,13 +21,14 @@ Write support -------------- - h5 -- cool +- cool / mcool - scool - homer - ginteractions +- hicpro Citation: ^^^^^^^^^ Joachim Wolff, Leily Rabbani, Ralf Gilsbach, Gautier Richard, Thomas Manke, Rolf Backofen, Björn A Grüning. -**Galaxy HiCExplorer 3: a web server for reproducible Hi-C, capture Hi-C and single-cell Hi-C data analysis, quality control and visualization, Nucleic Acids Research**, gkaa220, https://doi.org/10.1093/nar/gkaa220 +**Galaxy HiCExplorer 3: a web server for reproducible Hi-C, capture Hi-C and single-cell Hi-C data analysis, quality control and visualization, Nucleic Acids Research**, Volume 48, Issue W1, 02 July 2020, Pages W177–W184, https://doi.org/10.1093/nar/gkaa220 diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 0000000..bd00115 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,80 @@ +pr: + autoCancel: true + +jobs: + +- job: 'Linux' + timeoutInMinutes: 0 + pool: + vmImage: 'ubuntu-latest' + strategy: + matrix: + Python36: + python.version: '3.6' + Python37: + python.version: '3.7' + Python38: + python.version: '3.8' + + steps: + - bash: | + echo "##vso[task.prependpath]$CONDA/bin" + hash -r + displayName: Add conda to PATH + - bash: | + conda config --set always_yes yes --set changeps1 no + conda info -a + conda create -n hicmatrix --yes -c conda-forge -c bioconda python=$(python.version) --file requirements.txt + source activate hicmatrix + conda install --yes -c conda-forge -c bioconda pytest flake8 pytest-xdist pytest-forked + conda install --yes -c conda-forge -c bioconda nose + conda install --yes pathlib + conda install --yes -c defaults -c conda-forge -c bioconda configparser + python setup.py install + displayName: installing dependencies + - script: | + source activate hicmatrix + flake8 . --exclude=.venv,.build,planemo_test_env,build --ignore=E501,F401,F403,E402,F999,F405,E712 + displayName: linting + - script: | + source activate hicmatrix + py.test hicmatrix/test/ --capture=sys + displayName: pytest + +- job: 'OSX' + timeoutInMinutes: 0 + pool: + vmImage: 'macOS-10.14' + strategy: + matrix: + Python36: + python.version: '3.6' + Python37: + python.version: '3.7' + Python38: + python.version: '3.8' + + steps: + - bash: | + echo "##vso[task.prependpath]$CONDA/bin" + hash -r + displayName: Add conda to PATH + - bash: | + conda config --set always_yes yes --set changeps1 no + conda info -a + conda create -n hicmatrix --yes -c conda-forge -c bioconda python=$(python.version) --file requirements.txt + source activate hicmatrix + conda install --yes -c conda-forge -c bioconda pytest flake8 pytest-xdist pytest-forked + conda install --yes -c conda-forge -c bioconda nose + conda install --yes pathlib + conda install --yes -c defaults -c conda-forge -c bioconda configparser + python setup.py install + displayName: installing dependencies + - script: | + source activate hicmatrix + flake8 . --exclude=.venv,.build,planemo_test_env,build --ignore=E501,F401,F403,E402,F999,F405,E712 + displayName: linting + - script: | + source activate hicmatrix + py.test hicmatrix/test/ --capture=sys + displayName: pytest diff --git a/hicmatrix/HiCMatrix.py b/hicmatrix/HiCMatrix.py index 24aeca6..e070c8b 100644 --- a/hicmatrix/HiCMatrix.py +++ b/hicmatrix/HiCMatrix.py @@ -13,6 +13,7 @@ from intervaltree import IntervalTree, Interval import cooler import time +from collections import Counter from .utilities import toBytes from .utilities import toString @@ -169,7 +170,7 @@ def getBinSize(self): return self.bin_size # If there are more bins, the diff will be compared # to the median of the differences between starts - median = int(np.median(np.diff(start))) + median = int(np.median(np.concatenate([np.diff([start for chro, start, end, extra in self.cut_intervals if chro == cur_chrom]) for cur_chrom, nb in Counter(chrom).items() if nb > 1]))) # check if the bin size is # homogeneous @@ -334,7 +335,7 @@ def fit_cut_intervals(cut_intervals): return cut_intervals chrom, start, end, extra = zip(*cut_intervals) - median = int(np.median(np.diff(start))) + median = int(np.median(np.concatenate([np.diff([start for chro, start, end, extra in cut_intervals if chro == cur_chrom]) for cur_chrom, nb in Counter(chrom).items() if nb > 1]))) diff = np.array(end) - np.array(start) # check if the bin size is homogeneous if len(np.flatnonzero(diff != median)) > (len(diff) * 0.01): @@ -354,7 +355,7 @@ def snap_nearest_multiple(start_x, m): def convert_to_zscore_matrix(self, maxdepth=None, perchr=False): return self.convert_to_obs_exp_matrix(maxdepth=maxdepth, zscore=True, perchr=perchr) - def convert_to_obs_exp_matrix(self, maxdepth=None, zscore=False, perchr=False): + def convert_to_obs_exp_matrix(self, maxdepth=None, zscore=False, perchr=False, pSkipTriu=False): """ Converts a corrected counts matrix into a obs / expected matrix or z-scores fast. @@ -395,10 +396,12 @@ def convert_to_obs_exp_matrix(self, maxdepth=None, zscore=False, perchr=False): # max_depth_in_bis # (this is done by subtracting a second sparse matrix # that contains only the upper matrix that wants to be removed. - self.matrix = triu(self.matrix, k=0, format='csr') - \ - triu(self.matrix, k=max_depth_in_bins, format='csr') + if not pSkipTriu: + self.matrix = triu(self.matrix, k=0, format='csr') - \ + triu(self.matrix, k=max_depth_in_bins, format='csr') else: - self.matrix = triu(self.matrix, k=0, format='csr') + if not pSkipTriu: + self.matrix = triu(self.matrix, k=0, format='csr') self.matrix.eliminate_zeros() depth = None diff --git a/hicmatrix/__init__.py b/hicmatrix/__init__.py index 7d33780..7ee7855 100644 --- a/hicmatrix/__init__.py +++ b/hicmatrix/__init__.py @@ -1,3 +1,5 @@ import logging logging.basicConfig(level=logging.INFO) +# logging.basicConfig(level=logging.DEBUG) + logging.getLogger('cooler').setLevel(logging.WARNING) diff --git a/hicmatrix/_version.py b/hicmatrix/_version.py index f0b6a15..fc3100d 100644 --- a/hicmatrix/_version.py +++ b/hicmatrix/_version.py @@ -1,2 +1,2 @@ -__version__ = '15' +__version__ = '16' # Version number differs from HiCExplorer! diff --git a/hicmatrix/lib/cool.py b/hicmatrix/lib/cool.py index fb38cb5..d158f1f 100644 --- a/hicmatrix/lib/cool.py +++ b/hicmatrix/lib/cool.py @@ -53,11 +53,12 @@ def load(self): log.warning('No matrix is initialized') try: cooler_file = cooler.Cooler(self.matrixFileName) - if 'metadata' in cooler_file.info: - self.hic_metadata = cooler_file.info['metadata'] - else: - self.hic_metadata = None - self.cool_info = deepcopy(cooler_file.info) + # if 'metadata' in cooler_file.info: + self.hic_metadata = cooler_file.info + # else: + # self.hic_metadata = None + # self.cool_info = deepcopy(cooler_file.info) + # log.debug('self.hic_metadata {}'.format(self.hic_metadata)) except Exception as e: log.warning("Could not open cooler file. Maybe the path is wrong or the given node is not available.") log.warning('The following file was tried to open: {}'.format(self.matrixFileName)) @@ -256,10 +257,13 @@ def load(self): nan_bins = None distance_counts = None + # log.debug('self.hic_metadata {}'.format(self.hic_metadata)) return matrix, cut_intervals, nan_bins, distance_counts, correction_factors def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): + log.debug('self.hic_metadata 34{}'.format(self.hic_metadata)) + self.matrix.eliminate_zeros() if self.nan_bins is not None and len(self.nan_bins) > 0 and self.fileWasH5: @@ -296,6 +300,7 @@ def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): # instead of handling this before. bins_data_frame = pd.DataFrame(self.cut_intervals, columns=['chrom', 'start', 'end', 'interactions']).drop('interactions', axis=1) dtype_pixel = {'bin1_id': np.int32, 'bin2_id': np.int32, 'count': np.int32} + log.debug('foo') if self.correction_factors is not None and pApplyCorrection: dtype_pixel['weight'] = np.float32 @@ -313,6 +318,7 @@ def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): self.correctionOperator = '*' log.debug('inverted correction factors') weight = convertNansToOnes(np.array(self.correction_factors).flatten()) + log.debug('weight {}'.format(weight)) bins_data_frame = bins_data_frame.assign(weight=weight) log.debug("Reverting correction factors on matrix...") @@ -340,7 +346,7 @@ def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): dtype_pixel['weight'] = np.float32 weight = convertNansToOnes(np.array(self.correction_factors).flatten()) bins_data_frame = bins_data_frame.assign(weight=weight) - + log.debug('weight 2: {}'.format(weight)) instances, features = self.matrix.nonzero() matrix_data_frame = pd.DataFrame(instances, columns=['bin1_id'], dtype=np.int32) @@ -386,19 +392,13 @@ def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): info['tool-url'] = str('https://github.com/deeptools/HiCMatrix') - # info['nchroms'] = int(bins_data_frame['chrom'][:].nunique()) - # info['chromosomes'] = list(bins_data_frame['chrom'][:].unique()) - # info['nnz'] = np.string_(str(self.matrix.nnz * 2)) - # info['min-value'] = np.string_(str(matrix_data_frame['count'].min())) - # info['max-value'] = np.string_(str(matrix_data_frame['count'].max())) - # info['sum-elements'] = int(matrix_data_frame['count'].sum()) - if self.hic_metadata is not None and 'matrix-generated-by' in self.hic_metadata: info['matrix-generated-by'] = str(self.hic_metadata['matrix-generated-by']) del self.hic_metadata['matrix-generated-by'] if self.hic_metadata is not None and 'matrix-generated-by-url' in self.hic_metadata: info['matrix-generated-by-url'] = str(self.hic_metadata['matrix-generated-by-url']) del self.hic_metadata['matrix-generated-by-url'] + log.debug('self.hic_metadata {}'.format(self.hic_metadata)) if self.hic_metadata is not None and 'genome-assembly' in self.hic_metadata: info['genome-assembly'] = str(self.hic_metadata['genome-assembly']) del self.hic_metadata['genome-assembly'] @@ -406,7 +406,7 @@ def create_cooler_input(self, pSymmetric=True, pApplyCorrection=True): return bins_data_frame, matrix_data_frame, dtype_pixel, info def save(self, pFileName, pSymmetric=True, pApplyCorrection=True): - log.debug('Save in cool format') + log.debug('Save in cool format11112323') bins_data_frame, matrix_data_frame, dtype_pixel, info = self.create_cooler_input(pSymmetric=pSymmetric, pApplyCorrection=pApplyCorrection) local_temp_dir = os.path.dirname(os.path.realpath(pFileName)) @@ -416,9 +416,11 @@ def save(self, pFileName, pSymmetric=True, pApplyCorrection=True): mode=self.appendData, dtypes=dtype_pixel, ordered=True, - metadata=self.hic_metadata, + metadata=info, + temp_dir=local_temp_dir) + log.debug('info {}'.format(info)) if self.appendData == 'w': fileName = pFileName.split('::')[0] with h5py.File(fileName, 'r+') as h5file: diff --git a/hicmatrix/lib/hicpro.py b/hicmatrix/lib/hicpro.py index 463fb0e..496cb38 100644 --- a/hicmatrix/lib/hicpro.py +++ b/hicmatrix/lib/hicpro.py @@ -36,3 +36,16 @@ def load(self): distance_counts = None correction_factors = None return matrix, cut_intervals, nan_bins, distance_counts, correction_factors + + def save(self, pFilename, pSymmetric=None, pApplyCorrection=None): + self.matrix.eliminate_zeros() + instances, features = self.matrix.nonzero() + data = self.matrix.data + + with open(pFilename, 'w') as matrix_file: + for x, y, value in zip(instances, features, data): + matrix_file.write(str(int(x + 1)) + '\t' + str(int(y + 1)) + '\t' + str(value) + '\n') + + with open(self.bedFile, 'w') as bed_file: + for i, interval in enumerate(self.cut_intervals): + bed_file.write('\t'.join(map(str, interval[:3])) + '\t' + str(i + 1) + '\n') diff --git a/hicmatrix/lib/matrixFileHandler.py b/hicmatrix/lib/matrixFileHandler.py index 787f9e0..17214cc 100644 --- a/hicmatrix/lib/matrixFileHandler.py +++ b/hicmatrix/lib/matrixFileHandler.py @@ -18,9 +18,7 @@ def __init__(self, pFileType='cool', pMatrixFile=None, pChrnameList=None, if pFileType == 'hicpro': self.matrixFile = self.class_(pMatrixFile=pMatrixFile, pBedFile=pBedFileHicPro) else: - log.debug('23') self.matrixFile = self.class_(pMatrixFile=pMatrixFile) - log.debug('22 self.matrixFile.matrixFileName {}'.format(self.matrixFile.matrixFileName)) if pFileType == 'cool': self.matrixFile.chrnameList = pChrnameList if pCorrectionFactorTable is not None: @@ -28,23 +26,19 @@ def __init__(self, pFileType='cool', pMatrixFile=None, pChrnameList=None, if pCorrectionOperator is not None: self.matrixFile.correctionOperator = pCorrectionOperator if pEnforceInteger is not None: - log.debug('pEnforceInteger {}'.format(pEnforceInteger)) self.matrixFile.enforceInteger = pEnforceInteger if pAppend is not None: self.matrixFile.appendData = pAppend if pFileWasH5 is not None: self.matrixFile.fileWasH5 = pFileWasH5 - log.debug('pApplyCorrectionCoolerLoad {}'.format(pApplyCorrectionCoolerLoad)) if pApplyCorrectionCoolerLoad is not None: self.matrixFile.applyCorrectionLoad = pApplyCorrectionCoolerLoad if pHiCInfo is not None: - self.hic_metadata = pHiCInfo - log.debug('pHic2CoolVersion : {}'.format(pHic2CoolVersion)) + self.matrixFile.hic_metadata = pHiCInfo if pHic2CoolVersion is not None: self.matrixFile.hic2cool_version = pHic2CoolVersion if pDistance is not None: self.matrixFile.distance = pDistance - log.debug('self.distance {}'.format(self.matrixFile.distance)) if pMatrixFormat is not None: self.matrixFile.matrixFormat = pMatrixFormat if pLoadMatrixOnly is not None: