Source code for genipe.tests.test_impute2_extractor
# This file is part of genipe.
#
# This work is licensed under the Creative Commons Attribution-NonCommercial
# 4.0 International License. To view a copy of this license, visit
# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to Creative
# Commons, PO Box 1866, Mountain View, CA 94042, USA.
import os
import logging
import unittest
from tempfile import TemporaryDirectory
from ..tools import impute2_extractor
__author__ = "Louis-Philippe Lemieux Perreault"
__copyright__ = "Copyright 2014, Beaulieu-Saucier Pharmacogenomics Centre"
__license__ = "Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)"
__all__ = ["TestImpute2Extractor"]
[docs]class TestImpute2Extractor(unittest.TestCase):
[docs] @staticmethod
def clean_logging_handlers():
handlers = list(logging.root.handlers)
for handler in handlers:
logging.root.removeHandler(handler)
[docs] def setUp(self):
"""Setup the tests."""
# Creating the temporary directory
self.output_dir = TemporaryDirectory(prefix="genipe_test_")
# Creating the input files (impute2, map, maf, completion_rate and
# impute2_info)
filename = os.path.join(self.output_dir.name, "genipe.impute2")
with open(filename, "w") as o_file:
o_file.write((
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:4214570_1 4214570 T TC 0.869 0.130 0 0.869 0.130 0 0.869 "
"0.130 0\n"
))
filename = os.path.join(self.output_dir.name, "genipe.impute2_info")
with open(filename, "w") as o_file:
o_file.write((
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
"1\t1:4214570_1\t4214570\tT\tTC\t0.174\t0.589\t0.831\t0\t-1\t"
"-1\t-1\n"
))
filename = os.path.join(self.output_dir.name, "genipe.map")
with open(filename, "w") as o_file:
o_file.write((
"1\trs12345\t0\t1231415\n"
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
"1\t1:4214570_1\t0\t4214570\n"
))
filename = os.path.join(self.output_dir.name, "genipe.maf")
with open(filename, "w") as o_file:
o_file.write((
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
"1:4214570_1\tT\tTC\t{}\n"
).format(1/6, 0.5, 1/4, 1/4, 1/4, 1/4, "NA"))
filename = os.path.join(self.output_dir.name,
"genipe.completion_rates")
with open(filename, "w") as o_file:
o_file.write((
"name\tnb_missing\tcompletion_rate\n"
"rs12345\t0\t{}\n"
"rs23456\t1\t{}\n"
"rs23457\t1\t{}\n"
"rs23457_1\t1\t{}\n"
"rs23457_2\t1\t{}\n"
"1:3214573\t1\t{}\n"
"1:4214570_1\t3\t{}\n"
).format(1, 2/3, 2/3, 2/3, 2/3, 2/3, 0))
filename = os.path.join(self.output_dir.name, "genipe.sample")
with open(filename, "w") as o_file:
o_file.write(
"ID_1 ID_2 missing father mother sex plink_pheno\n"
"0 0 0 D D D B\n"
"f1 s1 0 0 0 0 -9\n"
"f2 s2 0 0 0 0 -9\n"
"f3 s3 0 0 0 0 -9\n"
)
self.common_args = [
"--impute2", os.path.join(self.output_dir.name, "genipe.impute2"),
"--format", "impute2", "dosage", "calls",
"--out", os.path.join(self.output_dir.name, "results"),
]
[docs] def tearDown(self):
"""Finishes the test."""
# Deleting the output directory
self.output_dir.cleanup()
[docs] def test_extract(self):
"""Tests the extraction by marker name."""
# Creating a file with markers to extract
extract_filename = os.path.join(self.output_dir.name, "to_extract")
with open(extract_filename, "w") as o_file:
o_file.write("rs23456\nrs23457_2\n1:4214570_1\n")
# Executing the script
args = self.common_args + [
"--extract", extract_filename,
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:4214570_1 4214570 T TC 0.869 0.130 0 0.869 0.130 0 0.869 "
"0.130 0\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t4214570\t1:4214570_1\tTC\tT\tnan\tnan\tnan\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:4214570_1\t0\t4214570\t0 0\t0 0\t0 0\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t-1\t"
"-1\n"
"1\t1:4214570_1\t4214570\tT\tTC\t0.174\t0.589\t0.831\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:4214570_1\t0\t4214570\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (maf)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:4214570_1\tT\tTC\t{}\n"
).format(0.5, 1/4, "NA")
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic(self):
"""Tests the extraction by genomic location."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:3214570-3214573",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t-1\t"
"-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(1/4, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_maf(self):
"""Tests the extraction by maf."""
# Executing the script
args = self.common_args + [
"--maf", "0.25",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(0.5, 1/4, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_rate(self):
"""Tests the extraction by completion rate."""
# Executing the script
args = self.common_args + [
"--rate", "0.5",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(1/6, 0.5, 1/4, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_info(self):
"""Tests the extraction by information value."""
# Executing the script
args = self.common_args + [
"--info", "0.3",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:4214570_1 4214570 T TC 0.869 0.130 0 0.869 0.130 0 0.869 "
"0.130 0\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
"1\t4214570\t1:4214570_1\tTC\tT\tnan\tnan\tnan\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
"1\t1:4214570_1\t0\t4214570\t0 0\t0 0\t0 0\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
"1\t1:4214570_1\t4214570\tT\tTC\t0.174\t0.589\t0.831\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
"1\trs23456\t0\t3214569\n"
"1\trs23457_1\t0\t3214571\n"
"1\t1:3214573\t0\t3214573\n"
"1\t1:4214570_1\t0\t4214570\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
"rs23456\tT\tC\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
"1:4214570_1\tT\tTC\t{}\n"
).format(1/6, 0.5, 1/4, 1/4, "NA")
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_maf(self):
"""Tests the extraction by genomic location and maf."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:3214569-3214573",
"--maf", "0.3",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
).format(0.5)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_rate(self):
"""Tests the extraction by genomic location and completion rate."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:1231415-3214573",
"--rate", "0.7",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
).format(1/6)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_info(self):
"""Tests the extraction by genomic location and information value."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:1231415-3214573",
"--info", "0.28",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(1/6, 0.5, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_maf_rate(self):
"""Tests the extraction by maf and completion rate."""
# Executing the script
args = self.common_args + [
"--maf", "0.2",
"--rate", "0.6",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(0.5, 1/4, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_maf_info(self):
"""Tests the extraction by maf and information value."""
# Executing the script
args = self.common_args + [
"--maf", "0.2",
"--info", "0.3",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
"1\trs23457_1\t0\t3214571\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(0.5, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_rate_info(self):
"""Tests the extraction by completion rate and information value."""
# Executing the script
args = self.common_args + [
"--info", "0.35",
"--rate", "0.6",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
"1\trs23456\t0\t3214569\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
"rs23456\tT\tC\t{}\n"
).format(1/6, 0.5)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_maf_rate(self):
"""Tests the extraction by genomic location, maf and rate."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:3214568-4514570",
"--maf", "0.01",
"--rate", "0.6",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
"1 rs23457 3214570 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_1 3214571 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 rs23457_2 3214572 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
"1 1:3214573 3214573 T TC 0.869 0.130 0 0 1 0 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
"1\t3214570\trs23457\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214571\trs23457_1\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214572\trs23457_2\tT\tTC\tnan\t1.0\t0.0\n"
"1\t3214573\t1:3214573\tT\tTC\tnan\t1.0\t0.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
"1\trs23457\t0\t3214570\t0 0\tT TC\tTC TC\n"
"1\trs23457_1\t0\t3214571\t0 0\tT TC\tTC TC\n"
"1\trs23457_2\t0\t3214572\t0 0\tT TC\tTC TC\n"
"1\t1:3214573\t0\t3214573\t0 0\tT TC\tTC TC\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
"1\trs23457\t3214570\tT\tTC\t0.126\t0.299\t0.832\t0\t-1\t-1\t"
"-1\n"
"1\trs23457_1\t3214571\tT\tTC\t0.060\t0.300\t0.909\t0\t-1\t"
"-1\t-1\n"
"1\trs23457_2\t3214572\tT\tTC\t0.084\t0.203\t0.854\t0\t-1\t"
"-1\t-1\n"
"1\t1:3214573\t3214573\tT\tTC\t0.371\t0.339\t0.619\t0\t-1\t"
"-1\t-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
"1\trs23457\t0\t3214570\n"
"1\trs23457_1\t0\t3214571\n"
"1\trs23457_2\t0\t3214572\n"
"1\t1:3214573\t0\t3214573\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
"rs23457\tTC\tT\t{}\n"
"rs23457_1\tTC\tT\t{}\n"
"rs23457_2\tTC\tT\t{}\n"
"1:3214573\tTC\tT\t{}\n"
).format(0.5, 1/4, 1/4, 1/4, 1/4)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_maf_info(self):
"""Tests the extraction by genomic location, maf and information."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:3214568-4514570",
"--maf", "0.01",
"--info", "0.35",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
).format(0.5)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_maf_rate_info(self):
"""Tests the extraction by maf, completion rate and information."""
# Executing the script
args = self.common_args + [
"--rate", "0.7",
"--maf", "0.01",
"--info", "0.35",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs12345 1231415 A G 1 0 0 0.988 0.002 0 0 0.997 0.003\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t1231415\trs12345\tG\tA\t0.0\t0.002\t1.003\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs12345\t0\t1231415\tA A\tA A\tA G\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs12345\t1231415\tA\tG\t0.006\t0.359\t0.987\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs12345\t0\t1231415\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs12345\tA\tG\t{}\n"
).format(1/6)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
[docs] def test_genomic_maf_rate_info(self):
"""Tests the extraction by genomic location, maf, rate and info."""
# Executing the script
args = self.common_args + [
"--genomic", "chr1:1231415-3214572",
"--rate", "0.6",
"--maf", "0.2",
"--info", "0.35",
]
impute2_extractor.main(args=args)
TestImpute2Extractor.clean_logging_handlers()
# Testing we have the three output files
template_name = os.path.join(self.output_dir.name, "results.{ext}")
for suffix in ("impute2", "dosage", "calls"):
self.assertTrue(os.path.isfile(template_name.format(ext=suffix)))
# Checking the impute2 file
expected = (
"1 rs23456 3214569 T C 0.869 0.130 0 0.903 0.095 0.002 0 0 1\n"
)
observed = None
with open(template_name.format(ext="impute2"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the dosage file
expected = (
"chrom\tpos\tname\tminor\tmajor\tf1/s1\tf2/s2\tf3/s3\n"
"1\t3214569\trs23456\tC\tT\tnan\t0.099\t2.0\n"
)
observed = None
with open(template_name.format(ext="dosage"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checking the hard calls file
expected = (
"chrom\tname\tcm\tpos\tf1/s1\tf2/s2\tf3/s3\n"
"1\trs23456\t0\t3214569\t0 0\tT T\tC C\n"
)
observed = None
with open(template_name.format(ext="calls"), "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (impute2_info)
info_fn = template_name.format(ext="impute2_info")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"chr\tname\tposition\ta0\ta1\texp_freq_a1\tinfo\tcertainty\t"
"type\tinfo_type0\tconcord_type0\tr2_type0\n"
"1\trs23456\t3214569\tT\tC\t0.082\t0.362\t0.866\t0\t-1\t-1\t"
"-1\n"
)
observed = None
with open(info_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion files (map)
map_fn = template_name.format(ext="map")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"1\trs23456\t0\t3214569\n"
)
observed = None
with open(map_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)
# Checks companion file (map)
maf_fn = template_name.format(ext="maf")
self.assertTrue(os.path.isfile(info_fn))
expected = (
"name\tmajor\tminor\tmaf\n"
"rs23456\tT\tC\t{}\n"
).format(0.5)
observed = None
with open(maf_fn, "r") as i_file:
observed = i_file.read()
self.assertEqual(expected, observed)