Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dummy ignore #1873

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion htslib/kstring.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* The MIT License

Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024 Genome Research Ltd.
Copyright (C) 2013-2014, 2016, 2018-2020, 2022, 2024-2025 Genome Research Ltd.

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -449,6 +449,57 @@ static inline int *ksplit(kstring_t *s, int delimiter, int *n)
return offsets;
}

/**
* kinsert_char - inserts a char to kstring
* @param c - char to insert
* @param pos - position at which to insert, starting from 0
* @param s - pointer to output string
* Returns 0 on success and -1 on failure
* 0 for pos inserts at start and length of current string as pos appends at
* the end.
*/
static inline int kinsert_char(char c, size_t pos, kstring_t *s)
{
if (!s || pos > s->l) {
return EOF;
}
if (ks_resize(s, s->l + 2) < 0) {
return EOF;
}
memmove(s->s + pos + 1, s->s + pos, s->l - pos);
s->s[pos] = c;
s->s[++s->l] = 0;
return 0;
}

/**
* kinsert_str - inserts a null terminated string to kstring
* @param str - string to insert
* @param pos - position at which to insert, starting from 0
* @param s - pointer to output string
* Returns 0 on success and -1 on failure
* 0 for pos inserts at start and length of current string as pos appends at
* the end. empty string makes no update.
*/
static inline int kinsert_str(const char *str, size_t pos, kstring_t *s)
{
size_t len = 0;
if (!s || pos > s->l || !str) {
return EOF;
}
if (!(len = strlen(str))) {
return 0;
}
if (ks_resize(s, s->l + len + 1) < 0) {
return EOF;
}
memmove(s->s + pos + len, s->s + pos, s->l - pos);
memcpy(s->s + pos, str, len);
s->l += len;
s->s[s->l] = '\0';
return 0;
}

#ifdef HTSLIB_SSIZE_T
#undef HTSLIB_SSIZE_T
#undef ssize_t
Expand Down
42 changes: 18 additions & 24 deletions htslib/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/// High-level VCF/BCF variant calling file operations.
/*
Copyright (C) 2012, 2013 Broad Institute.
Copyright (C) 2012-2020, 2022-2023 Genome Research Ltd.
Copyright (C) 2012-2020, 2022-2025 Genome Research Ltd.

Author: Heng Li <lh3@sanger.ac.uk>

Expand Down Expand Up @@ -1501,31 +1501,25 @@ static inline int bcf_float_is_vector_end(float f)
return u.i==bcf_float_vector_end ? 1 : 0;
}


/**
* bcf_format_gt_v2 - formats GT information on a string
* @param hdr - bcf header, to get version
* @param fmt - pointer to bcf format data
* @param isample - position of interested sample in data
* @param str - pointer to output string
* Returns 0 on success and -1 on failure
* This method is preferred over bcf_format_gt as this supports vcf4.4 and
* prefixed phasing. Explicit / prefixed phasing for 1st allele is used only
* when it is a must to correctly express phasing.
*/
HTSLIB_EXPORT
int bcf_format_gt_v2(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample,
kstring_t *str) HTS_RESULT_USED;

static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
{
uint32_t e = 0;
#define BRANCH(type_t, convert, missing, vector_end) { \
uint8_t *ptr = fmt->p + isample*fmt->size; \
int i; \
for (i=0; i<fmt->n; i++, ptr += sizeof(type_t)) \
{ \
type_t val = convert(ptr); \
if ( val == vector_end ) break; \
if ( i ) e |= kputc("/|"[val&1], str) < 0; \
if ( !(val>>1) ) e |= kputc('.', str) < 0; \
else e |= kputw((val>>1) - 1, str) < 0; \
} \
if (i == 0) e |= kputc('.', str) < 0; \
}
switch (fmt->type) {
case BCF_BT_INT8: BRANCH(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
case BCF_BT_INT16: BRANCH(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
case BCF_BT_INT32: BRANCH(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
case BCF_BT_NULL: e |= kputc('.', str) < 0; break;
default: hts_log_error("Unexpected type %d", fmt->type); return -2;
}
#undef BRANCH
return e == 0 ? 0 : -1;
return bcf_format_gt_v2(NULL, fmt, isample, str);
}

static inline int bcf_enc_size(kstring_t *s, int size, int type)
Expand Down
10 changes: 10 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
run_test('test_bcf2vcf',$opts);
run_test('test_vcf_sweep',$opts,out=>'test-vcf-sweep.out');
run_test('test_vcf_various',$opts);
run_test('test_vcf_44', $opts);
run_test('test_bcf_sr_sort',$opts);
run_test('test_bcf_sr_no_index',$opts);
run_test('test_bcf_sr_range', $opts);
Expand Down Expand Up @@ -1159,6 +1160,15 @@ sub test_vcf_various
cmd => "$$opts{path}/test_view $$opts{path}/modhdr.vcf.gz chr22:1-2");
}

sub test_vcf_44
{
my ($opts, %args) = @_;

# vcf4.4 with implicit and explicit phasing info combinations
test_cmd($opts, %args, out => "vcf44_1.expected",
cmd => "$$opts{bin}/htsfile -c $$opts{path}/vcf44_1.vcf");
}

sub write_multiblock_bgzf {
my ($name, $frags) = @_;

Expand Down
136 changes: 135 additions & 1 deletion test/test_kstring.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* test_kstring.c -- kstring unit tests

Copyright (C) 2018, 2020, 2024 Genome Research Ltd.
Copyright (C) 2018, 2020, 2024-2025 Genome Research Ltd.

Author: Rob Davies <rmd@sanger.ac.uk>

Expand Down Expand Up @@ -451,6 +451,134 @@ static int test_kgetline2(void) {
return EXIT_SUCCESS;
}

static int test_kinsertchar(void) {
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE;
int i = 0;
struct data {
int pos;
const char *val;
};

struct data tdata[] = { { -1, ""}, {0, "X0123"}, {1, "0X123"}, {2, "01X23"},
{3, "012X3"}, {4, "0123X"}, {5, ""} };

for (i = -1; i < 6; ++i) {
kstring_t s = KS_INITIALIZE;
kputs("0123", &s);
if (kinsert_char('X', i, &s) < 0) {
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures
fprintf(stderr, "kinsert_char failed\n");
ks_free(&s);
return -1;
}
if (s.s[s.l] != '\0') {
fprintf(stderr, "No NUL termination on string from kinsert_char\n");
ks_free(&s);
return -1;
}
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) {
fprintf(stderr, "kinsert_char comparison failed\n");
ks_free(&s);
return -1;
}
ks_free(&s);
}
//realloc checks
for (i = 0; i < 7; ++i) {
kputc('A' + i, &res);
if (kinsert_char('A' + i, t.l, &t) < 0) {
fprintf(stderr, "kinsert_char failed in realloc\n");
ks_free(&res); ks_free(&t);
return -1;
}
if (t.s[t.l] != '\0') {
fprintf(stderr, "No NUL termination on string from kinsert_char in realloc\n");
ks_free(&res); ks_free(&t);
return -1;
}
if (memcmp(t.s, res.s, res.l+1)) {
fprintf(stderr, "kinsert_char realloc comparison failed in realloc\n");
ks_free(&res); ks_free(&t);
return -1;
}
}
ks_free(&t);
ks_free(&res);
return 0;
}

static int test_kinsertstr(void) {
kstring_t t = KS_INITIALIZE, res = KS_INITIALIZE;
int i = 0;
struct data {
int pos;
const char *val;
};

struct data tdata[] = { { -1, ""}, {0, "XYZ0123"}, {1, "0XYZ123"},
{2, "01XYZ23"}, {3, "012XYZ3"}, {4, "0123XYZ"}, {5, ""} };

for (i = -1; i < 6; ++i) {
kstring_t s = KS_INITIALIZE;
kputs("0123", &s);
if (kinsert_str("XYZ", i, &s) < 0) {
if ( i < 0 || i > 4) { ks_free(&s); continue; } //expected failures
fprintf(stderr, "kinsert_str failed\n");
return -1;
}
if (s.s[s.l] != '\0') {
fprintf(stderr, "No NUL termination on string from kinsert_str\n");
return -1;
}
if (memcmp(s.s, tdata[i + 1].val, s.l + 1)) {
fprintf(stderr, "kinsert_str comparison failed\n");
return -1;
}
ks_free(&s);
}
//realloc checks
for (i = 0; i < 15; ++i) {
kstring_t val = KS_INITIALIZE;
ksprintf(&val, "%c", 'A' + i);
kputs(val.s, &res);
if (kinsert_str(val.s, t.l, &t) < 0) {
ks_free(&val);
fprintf(stderr, "kinsert_str failed in realloc\n");
return -1;
}
if (t.s[t.l] != '\0') {
ks_free(&val); ks_free(&res);
fprintf(stderr, "No NUL termination on string from kinsert_str in realloc\n");
return -1;
}
if (memcmp(t.s, res.s, res.l+1)) {
ks_free(&val); ks_free(&res);
fprintf(stderr, "kinsert_str realloc comparison failed in realloc\n");
return -1;
}
ks_free(&val);
}
//empty strings
ks_free(&t);
if (kinsert_str("", 1, &t)) { //expected
if (kinsert_str("", 0, &t) || t.l != 0) {
fprintf(stderr, "kinsert_str empty insertion failed\n");
return -1;
}
} else {
fprintf(stderr, "kinsert_str empty ins to invalid pos succeeded\n");
return -1;
}
i = res.l;
if (kinsert_str("", 1, &res) || i != res.l) {
fprintf(stderr, "kinsert_str empty ins to valid pos failed\n");
ks_free(&res);
return -1;
}
ks_free(&res);
return 0;
}

int main(int argc, char **argv) {
int opt, res = EXIT_SUCCESS;
int64_t start = 0;
Expand Down Expand Up @@ -500,5 +628,11 @@ int main(int argc, char **argv) {
if (!test || strcmp(test, "kgetline2") == 0)
if (test_kgetline2() != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kinsertchar") == 0)
if (test_kinsertchar() != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kinsertstr") == 0)
if (test_kinsertstr() != 0) res = EXIT_FAILURE;

return res;
}
35 changes: 35 additions & 0 deletions test/vcf44_1.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
##fileformat=VCFv4.4
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=1,length=1000>
##reference=file://test
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##failue="test file on explicit and implicit phasing markers in 4.4"
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
1 61462 rs56992750 T A 100 PASS . GT 0|0|1 0/1
1 61480 rs56992751 T A 100 PASS . GT 0 /0|1
1 61481 rs56992752 T A 100 PASS . GT /0 |0/1
1 61482 rs56992752 T A 100 PASS . GT /0 /1
1 61483 rs56992752 T A 100 PASS . GT 0 1
1 61484 rs56992752 T A 100 PASS . GT 0 /1
1 61485 rs56992752 T A 100 PASS . GT 0 1
1 61486 rs56992752 T A 100 PASS . GT 0 1
1 61487 rs56992752 T A 100 PASS . GT 0 1
1 61488 rs56992752 T A 100 PASS . GT 0 /1
1 61489 rs56992752 T A 100 PASS . GT /0 1
1 61490 rs56992752 T A 100 PASS . GT /0 1
1 61491 rs56992752 T A 100 PASS . GT /0 /1
1 61492 rs56992752 T A 100 PASS . GT /0|0 1/0
1 61493 rs56992752 T A 100 PASS . GT 0|0 |1/0
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
1 61496 rs56992752 T A 100 PASS . GT . .
1 61497 rs56992752 T A 100 PASS . GT . |.
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
1 61499 rs56992752 T A 100 PASS . GT ./1 .|1
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61502 rs56992752 T A 100 PASS . GT 1/. /1|.
1 61503 rs56992752 T A 100 PASS . GT |1/. 1|.
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
1 61505 rs56992752 T A 100 PASS . GT ./. .|.
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.
34 changes: 34 additions & 0 deletions test/vcf44_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
##fileformat=VCFv4.4
##contig=<ID=1,length=1000>
##reference=file://test
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##failue="test file on explicit and implicit phasing markers in 4.4"
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
1 61462 rs56992750 T A 100 PASS . GT 0|0|1 0/1
1 61480 rs56992751 T A 100 PASS . GT 0 /0|1
1 61481 rs56992752 T A 100 PASS . GT /0 |0/1
1 61482 rs56992752 T A 100 PASS . GT /0 /1
1 61483 rs56992752 T A 100 PASS . GT 0 1
1 61484 rs56992752 T A 100 PASS . GT 0 /1
1 61485 rs56992752 T A 100 PASS . GT 0 |1
1 61486 rs56992752 T A 100 PASS . GT |0 1
1 61487 rs56992752 T A 100 PASS . GT |0 |1
1 61488 rs56992752 T A 100 PASS . GT |0 /1
1 61489 rs56992752 T A 100 PASS . GT /0 1
1 61490 rs56992752 T A 100 PASS . GT /0 |1
1 61491 rs56992752 T A 100 PASS . GT /0 /1
1 61492 rs56992752 T A 100 PASS . GT /0|0 /1/0
1 61493 rs56992752 T A 100 PASS . GT |0|0 |1/0
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
1 61496 rs56992752 T A 100 PASS . GT . .
1 61497 rs56992752 T A 100 PASS . GT /. |.
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
1 61499 rs56992752 T A 100 PASS . GT /./1 |.|1
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61502 rs56992752 T A 100 PASS . GT /1/. /1|.
1 61503 rs56992752 T A 100 PASS . GT |1/. |1|.
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
1 61505 rs56992752 T A 100 PASS . GT /./. |.|.
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.
Loading