// // using System; using KiwiSystem; class bwt_testchip { static int counter = 0; [Kiwi.OutputWordPort(11, 0, "done_status")] public static int done_status; [Kiwi.HardwareEntryPoint()] public static void chip() { done_status = 100; // Running Console.WriteLine("Am I onchip {0}", Kiwi.inHardware()); if (false) { //ensure malloc before lassoo done_status = 200; // string whence2 = "testdata2.dat"; string whence = "testdata.dat"; Kiwi.Pause(); { System.IO.TextReader reader_old = System.IO.File.OpenText(whence2); int c = reader_old.Read(); while (c != -1) { char cc = (char) c; Console.WriteLine("char from file {0}", (char)cc); c = reader_old.Read(); Kiwi.Pause(); // break; } } Console.WriteLine("End of program 4"); Kiwi.Pause(); using (System.IO.TextReader reader = System.IO.File.OpenText(whence)) { int c = reader.Read(); while (c != -1) { char cc = (char) c; Console.WriteLine("char from file {0}", (char)cc); c = reader.Read(); Kiwi.Pause(); } } Kiwi.Pause(); } const int max_length = 1000 * 1000 * 10; // Arrays need to be constant length for Kiwi use. dnaclass dna = new dnaclass(max_length); dnaclass.dna_record recc = new dnaclass.dna_record(); string folder = "/home/djg11/d320/gjigsaw"; // string dna_file = "MRSA252.dna"; // string dna_file = "shortdna.dna"; string dna_file = "minichunk.dna"; // dnaclass.ReadDNAFile_bases(System.IO.Path.Combine(folder, dna_file), recc); dnaclass.ReadDNAFile_bases(dna_file, recc); Console.WriteLine("now make fwd"); fwd_bwt test_bwt = new fwd_bwt("mytest", recc.Bases, recc.length, '.', "shortdna.dna.idx.dat0"); // fwd_bwt test_bwt = new fwd_bwt("mytest", "testdata".ToCharArray(), 59945, '.', "shortdna.dna.idx.dat0"); Console.WriteLine("End of program 999"); done_status = 99; // Finished. } public static void Main (string []argv) { Console.WriteLine("Hello World"); chip(); Console.WriteLine("End of Main 999"); } } // using System; using KiwiSystem; class bwt_testchip { static int counter = 0; [Kiwi.OutputWordPort(11, 0, "done_status")] public static int done_status; [Kiwi.HardwareEntryPoint()] public static void chip() { done_status = 100; // Running Console.WriteLine("Am I onchip {0}", Kiwi.inHardware()); if (false) { //ensure malloc before lassoo done_status = 200; // string whence2 = "testdata2.dat"; string whence = "testdata.dat"; Kiwi.Pause(); { System.IO.TextReader reader_old = System.IO.File.OpenText(whence2); int c = reader_old.Read(); while (c != -1) { char cc = (char) c; Console.WriteLine("char from file {0}", (char)cc); c = reader_old.Read(); Kiwi.Pause(); // break; } } Console.WriteLine("End of program 4"); Kiwi.Pause(); using (System.IO.TextReader reader = System.IO.File.OpenText(whence)) { int c = reader.Read(); while (c != -1) { char cc = (char) c; Console.WriteLine("char from file {0}", (char)cc); c = reader.Read(); Kiwi.Pause(); } } Kiwi.Pause(); } const int max_length = 1000 * 1000 * 10; // Arrays need to be constant length for Kiwi use. dnaclass dna = new dnaclass(max_length); dnaclass.dna_record recc = new dnaclass.dna_record(); string folder = "/home/djg11/d320/gjigsaw"; // string dna_file = "MRSA252.dna"; // string dna_file = "shortdna.dna"; string dna_file = "minichunk.dna"; // dnaclass.ReadDNAFile_bases(System.IO.Path.Combine(folder, dna_file), recc); dnaclass.ReadDNAFile_bases(dna_file, recc); Console.WriteLine("now make fwd"); fwd_bwt test_bwt = new fwd_bwt("mytest", recc.Bases, recc.length, '.', "shortdna.dna.idx.dat0"); // fwd_bwt test_bwt = new fwd_bwt("mytest", "testdata".ToCharArray(), 59945, '.', "shortdna.dna.idx.dat0"); Console.WriteLine("End of program 999"); done_status = 99; // Finished. } public static void Main (string []argv) { Console.WriteLine("Hello World"); chip(); Console.WriteLine("End of Main 999"); } } // eof // // using System.IO; using System; using gjig_types; public class resvec { public int start, end, edits, matches, first_match; string status = ""; public string print() { return "resvec: " + status + " start=" + start + ", end=" + end + ", edits=" + edits + ", matches=" + matches + ", at position " + first_match; } } public class bowtie { const int e_limit = 4; const int max_short_read_len = 321; char [] the_query; fwd_bwt the_bwt; // Note: we have a 'forwards BWT' whether we are scanning forwards or backwards. We need a 'reverse BWT' for scanning backwards (perversely that is from the start of the short read owing to BWT being based on suffixes in its standard form). int qlen; public int idx; public bool fwds; class frame { public int errors_so_far; public int from, to; public int sx; public void set0(int f, int t) { from = f; to = t; sx = 0; errors_so_far = 0; } }; frame [] Frames = new frame [max_short_read_len]; public bowtie(bool dir, fwd_bwt b) // constructor { fwds = dir; the_bwt = b; for (int i=0; i= 100 && ff.sx < 104) { ff.sx ++; // Try next alternative at this point ... char new_c = alt(ff.sx - 100); if (ccc == new_c) return 0; // skip retest of originally tried char. ccc = new_c; } int start = ff.from, end = ff.to; if (v1) Console.Write("pre char '{0}' (was '{4}') idx={1} in range {2}..{3}", ccc, idx, start, end, the_query[idx]); bool fok = the_bwt.rpair_step(ref start, ref end, ccc); if (v1) Console.WriteLine(" fok={4} bowtie search new range {1}..{2} sx={5} edits={7}", ccc, start, end-1, idx, fok, ff.sx, the_query[idx], ff.errors_so_far); if (fok) { int edits = 0; if (ff.sx > 0 && the_query[idx] != 'n') edits = 1; // We have succeeded in overcoming an error. int new_errors = ff.errors_so_far + edits; if (new_errors > error_limit) { if (v1) Console.WriteLine("Alternate char matched but we have now exceeded local error limit {0} > {1}", new_errors, error_limit); return -2; } int end_point = (stop_at >= 0) ? stop_at : fwds ? 0 : qlen-1; if (idx == end_point) { if (v1) Console.WriteLine("Char found. Reached stop-at point idx==endpoint={4}. Matches {3} times at {1}..{2} errors={0}", new_errors, the_bwt.decode(start), the_bwt.decode(end), end-start, end_point); return +1; } int nidx = (fwds) ? idx - 1 : idx +1; // Advance once char. frame nff = Frames[nidx]; nff.sx = 0; nff.from = start; nff.to = end; nff.errors_so_far = new_errors; idx = nidx; return 0; } else if (ff.errors_so_far <= error_limit)// Now try loose matching - might inc/log the error here? { if (ff.sx == 0) ff.sx = 100; if (v1) Console.WriteLine("did not match - will try alternatives"); return -1; // No progress but just call me again. } else { if (v1) Console.WriteLine("abort -2"); if (v1) Console.WriteLine("No match. Now exceeded local error limit {0} > {1}", ff.errors_so_far, error_limit); return -2; // No match possible } } public void readout(resvec rr) { rr.edits = Frames[idx].errors_so_far; rr.start = Frames[idx].from; rr.end = Frames[idx].to; rr.matches = rr.end - rr.start; rr.first_match = the_bwt.decode(rr.start); // if (v1) Console.WriteLine("Char found. Reached stop-at point idx==endpoint={4}. Matches {3} times at {1}..{2} errors={0}", new_errors, the_bwt.decode(start), the_bwt.decode(end), rr.end-start, end_point); } public bool at_start() // At place where we start the suffix search (i.e. end of the search string). { if ((fwds && idx == qlen-1) || (!fwds && idx == 0)) return true; return false; } public bool at_end() { if ((!fwds && idx == qlen-1) || (fwds && idx == 0)) return true; return false; } public void backstep() // If forwards, then go back one step, perversely by advancing the index, since BWT deals in suffixes. { idx = (fwds) ? idx+1: idx-1; trigger_backtrack(); // may not want this in here. } } public class fastq_read { public System.IO.StreamWriter spoolfile; fwd_bwt the_bwt, the_bwt_rev; //char [] the_query; bowtie bow_fwd, bow_rev; void one_sided_lookup__(string name, int no, char [] query, int qlen) { if (bow_fwd != null) bow_fwd.backtracking_scan_start(query, qlen); if (bow_rev != null) bow_rev.backtracking_scan_start(query, qlen); bowtie bow = (bow_fwd != null) ? bow_fwd: bow_rev; int rc; while (true) { rc = bow.search_scan_step(0, -1); if (rc == -2) { if (bow.at_start()) { Console.WriteLine("Not found."); break; // Cannot backtrack further. } bow.backstep(); } if (rc == 1) break; if (rc == 0) continue; } resvec results = new resvec(); bow.readout(results); if (rc == 1) { Console.WriteLine("{5} no {4} YES FOUND SHORT READ {3} {0} {1} {2} edits={6}", query, results.start, results.end, query, no, name, results.edits); if(spoolfile != null) spoolfile.WriteLine("{5} no {4} YES FOUND SHORT READ {3} {0} {1} {2} edits={6}", query, results.start, results.end, query, no, name, results.edits); } else { Console.WriteLine("{6} no {5} NOT FOUND SHORT READ {4} {0} {1} {2} rc={3}", query, results.start, results.end, rc, query, no, name); if(spoolfile != null) spoolfile.WriteLine("{6} no {5} NOT FOUND SHORT READ {4} {0} {1} {2} rc={3}", query, results.start, results.end, rc, query, no, name); } } void bowties_lookup(string name, int no, char [] query, int qlen, int bowtie_error_limit) { Console.WriteLine("Start lookup {0} no {1}", name, no); if (spoolfile != null) spoolfile.WriteLine("Start lookup {0} no {1}", name, no); bow_fwd.backtracking_scan_start(query, qlen); bow_rev.backtracking_scan_start(query, qlen); int rc_fwd, rc_rev; while (true) { rc_fwd = bow_fwd.search_scan_step(0, -1); if (rc_fwd != 0) break; } if (bow_fwd.at_end()) { resvec results = new resvec(); bow_fwd.readout(results); Console.WriteLine("{0} no {1}: Perfect match on fwds scan. {2}", name, no, results.print()); if (spoolfile != null) spoolfile.WriteLine("{0} no {1}: Perfect match on fwds scan. {2}", name, no, results.print()); return; } while (true) { rc_rev = bow_rev.search_scan_step(0, -1); if (rc_rev != 0) break; } if (bow_rev.at_end()) { resvec results = new resvec(); bow_fwd.readout(results); if (spoolfile != null) spoolfile.WriteLine("{0} no {1}: Perfect match on rev scan. {2}", name, no, results.print()); Console.WriteLine("{0} no {1}: Perfect match on rev scan. {2}", name, no, results.print()); } if (spoolfile != null) spoolfile.WriteLine("Stop phase 1 FWD rc={0} idx={1}; REV rc={2} idx={3}", rc_fwd, bow_fwd.idx, rc_rev, bow_rev.idx); Console.WriteLine("Stop phase 1 FWD rc={0} idx={1}; REV rc={2} idx={3}", rc_fwd, bow_fwd.idx, rc_rev, bow_rev.idx); int bow_start = bow_fwd.idx; int bow_end = bow_rev.idx; if (bow_fwd.idx <= bow_rev.idx) { if (spoolfile != null) spoolfile.WriteLine("bowtie index overlap - no further searching needed. {0}..{1}", bow_end, bow_start); Console.WriteLine("bowtie index overlap - no further searching needed. {0}..{1}", bow_end, bow_start); return; } if (spoolfile != null) spoolfile.WriteLine("bowtie index no overlap - further searching in interval {0}..{1}", bow_end, bow_start); Console.WriteLine("bowtie index no overlap - further searching in interval {0}..{1}", bow_end, bow_start); while (true) // Main inner scan loop { int rc = bow_fwd.search_scan_step(bowtie_error_limit, bow_end); if (rc == -2) { if (bow_fwd.idx == bow_start) // { if (spoolfile != null) spoolfile.WriteLine("Not found in phase 2."); Console.WriteLine("Not found in phase 2."); break; // Cannot backtrack further. } bow_fwd.backstep(); } if (rc == 1) break; // found } if (spoolfile != null) spoolfile.WriteLine("Stop phase 2 FWD rc={0} idx={1}; REV rc={2} idx={3}", rc_fwd, bow_fwd.idx, rc_rev, bow_rev.idx); Console.WriteLine("Stop phase 2 FWD rc={0} idx={1}; REV rc={2} idx={3}", rc_fwd, bow_fwd.idx, rc_rev, bow_rev.idx); resvec results1 = new resvec(); bow_fwd.readout(results1); results1.first_match -= bow_end; if (spoolfile != null) spoolfile.WriteLine("{0} no {1}: finished search phase 2 scan. {2}", name, no, results1.print()); Console.WriteLine("{0} no {1}: finished search phase 2 scan. {2}", name, no, results1.print()); } public fastq_read(fwd_bwt the_bwt1, fwd_bwt the_bwt_rev1) // constructor { the_bwt_rev = the_bwt_rev1; the_bwt = the_bwt1; } public void test(string name, int test_no, string test_needle) { Console.WriteLine("\n\n"); Console.WriteLine("{0} no {1} start test.", name, test_no); // index = ind; bow_fwd = new bowtie(true, the_bwt); bow_rev = new bowtie(false, the_bwt_rev); bowties_lookup(name, test_no, test_needle.ToCharArray(), test_needle.Length, 5); Console.WriteLine("{0} no {1} done test.", name, test_no); Console.WriteLine("=========================================\n\n\n\n\n"); } public void reader(string folder, string fastq_file) // { string spoolfilename = "rboth"; spoolfile = (spoolfilename=="") ? null: new System.IO.StreamWriter(spoolfilename); bow_rev = new bowtie(false, the_bwt); bow_fwd = new bowtie(true, the_bwt); System.Console.WriteLine("fastq_read {0} {1}", folder, fastq_file); ReadFile1(System.IO.Path.Combine(folder, fastq_file)); // koo: File read in 6 seconds . 607067609/6 = 607 067 609 = 100 MBytes/second. // wc takes 11 seconds to process the same file // wc: 21385556 21385556 754,571,544 8788_1#10_1_clipped.fastq } public void ReadFile1(string filePath) { int line_no = 0; int short_reads = 0; int seeds = 0; System.IO.StreamReader file = new System.IO.StreamReader(filePath); const int max_sr_len = 321; // TODO repeated char [] short_read = new char [max_sr_len]; int [] quality = new int [max_sr_len]; bool errored = false; while (!errored) { string [] lines = new string [4]; int d; for (d=0; d<4; d++) // fastq has quadline entries { if ((lines[d] = file.ReadLine()) == null) break; line_no += 1; } if (d == 0) break; if (d != 4) { Console.WriteLine("Abort malformed fastq quadline: not a multiple of four lines in length"); break; } if (lines[0][0] != '@') { Console.WriteLine("Expected @ sign on line {0} of {1}", line_no, filePath); break; } string rd = lines[1]; int len = 0; for (int c=0; c': incomment = true; break; case '\n': case '\r': // incomment = false; break; //case '\t': case ' ': continue; default: // if (incomment) continue; Console.WriteLine("Failed - invalid char 0x{0:x} {2}:{1}", (int)cc, line_no, filePath); errored = true; break; } if (len == max_sr_len) { Console.WriteLine("Short read {0}:{1} is long!", short_reads, rd); Environment.Exit(1); } if (d>=0) short_read[len++] = Char.ToLower(cc); } string qual = lines[3]; if (qual.Length != rd.Length) { Console.WriteLine("Short read {0}:{1} has wrong length quality string {2}", short_reads, rd, qual); Environment.Exit(1); } for (int c=0; c= '~') { Console.WriteLine("Short read {0}:{1} has bad char in its quality string {2}", short_reads, rd, qual); Environment.Exit(1); } quality[c] = (int)ccq - (int)'!' + 1; } System.Console.WriteLine("\n\nStart on read No {0} len={1} {2}", short_reads, len, rd); bowties_lookup("fastq", short_reads, short_read, len, 5) ; short_reads ++; } if (errored) Console.WriteLine("ERRORS ENCOUNTERED"); Console.WriteLine("Read {0} short reads containing {3} seeds from {2}:{1}", short_reads, line_no, filePath, seeds); file.Close(); } } // eof using System; using KiwiSystem; // // CBG Squanderer: Burrows-Wheeler Transform and String Searcher in C#. // (C) 2009-14 David J Greaves, University of Cambridge Computer Laboratory. // With decimated rank index for linear space lookup. public class fwd_bwt { const int max_length = 1000 * 1000 * 10; // Arrays need to be constant length for Kiwi use. const bool nofloat = true; // for now bool sanity_check_enable = !Kiwi.inHardware(); // Omit checks for FPGA implementation. char eos; // Terminator. string name; char [] string_chars; int [] ptrs; int length; static int [] tots = new int [256]; // Character occurance counts static int [] Tots_before = new int [256]; int alphabet_size = 0; static int [] compress_map = new int[256]; // Mapping of each char in use to a small integer int rank_decimation_factor = 1; int compress(char cc, bool insert_if_missing=false) // Map each character in use to a small integer in the range 0 to alphabet_size-1. { if (cc == eos) { Console.WriteLine("EOS char {0} appears in needle (search string).", eos); //FOR NOW if (!Kiwi.inHardware()) Console.WriteLine("MST: {0}", System.Environment.StackTrace); Environment.Exit(1); } if (compress_map[cc]==0) // The mapping holds that integer + 1. { if (!insert_if_missing) return -1; compress_map[cc] = ++ alphabet_size; } return compress_map[cc] - 1; } int bound_log2(int n) { int r = 1; while (r < n) r *= 2; return r; } const int max_asize = 5; static int [,] Ranks = new int [max_length, max_asize]; static int [] lranks = new int [max_asize]; //alphabet_size]; void compute_ranks(bool disable_decimate=false) // Compute rank at every position - can be too memory hungry for large alphabets but is ok for DNA. { Console.WriteLine ("BWT: Starting Alphabet Arity Find."); for (int viz=0; viz 5 * alphabet_size) rank_decimation_factor = bound_log2(alphabet_size); Console.WriteLine ("BWT: Starting Ranking with decimation_factor {0}.", rank_decimation_factor); Console.WriteLine("Exit here\n"); return; //Please can we have a much better error on this sort of thing // Ranks = new int [(length + rank_decimation_factor -1) / rank_decimation_factor, alphabet_size]; int o=0; // int [] lranks = new int [max_asize]; //alphabet_size]; for (int x=0; x 100) { Console.WriteLine(" ... and so on"); break; } string ranks = ""; if (alphabet_size < 8) { int rem = row % rank_decimation_factor; if (rem == 0) { int r0 = row / rank_decimation_factor; ranks = "ranks="; for (int xx= alphabet_size-1; xx >= 0; xx--) ranks += Ranks[r0, xx] + " "; } } Console.WriteLine(" hay= {4} sort= {2} bwt= {3} {0} {1} {5}", row, ptrs[row], string_chars[ptrs[row]], bwt(row), string_chars[row], ranks); // print sorted and bwt } } int ranks(int row, int xx) // lookup { if (row < 0 || row >= length) { Console.WriteLine("row {0} is out of range.", row); Console.WriteLine("MST: {0}", System.Environment.StackTrace); Environment.Exit(1); } int r0 = row / rank_decimation_factor; int rem = row % rank_decimation_factor; int ans = Ranks[r0, xx]; if (rem != 0) { r0 *= rank_decimation_factor; while (rem != 0) { rem --; r0 ++; // Console.WriteLine("row find {3}: scan {0} {1} ans={2}", rem, r0, ans, row); char c1 = bwt(r0); if (c1 != eos && compress(c1) == xx) ans ++; // Nasty: calling compress in the inner loop } } return ans; } public void rpair_init(ref int start, ref int end) { start = 1; end = length; } public bool rpair_step(ref int start, ref int end, char ccc) { if (start == end) return false; int xx = compress(ccc); if (xx < 0) { Console.WriteLine("{0} is not in haystack alphabet, so string not found.", ccc); return false; } int nn = Tots_before [ccc]; Console.WriteLine(" {1}th letter of compressed alphabet, {0} tots before", nn, xx); start = nn + ranks(start-1, xx); end = nn + ranks(end-1, xx); return (end > start); } public int find_string(char [] query) // A basic search function { int start=0, end=0; rpair_init(ref start, ref end); for (int idx = query.Length-1; idx >= 0; idx --) { char ccc = query[idx]; Console.WriteLine("Search {3}th char, '{0}', in the range {1}..{2} ", ccc, start, end-1, idx); int xx = compress(ccc); if (xx < 0) { Console.WriteLine("{0} is not in haystack alphabet, so string not found.", ccc); return -1; } int nn = Tots_before [ccc]; Console.WriteLine(" {1}th letter of compressed alphabet, {0} tots before", nn, xx); start = nn + ranks(start-1, xx); end = nn + ranks(end-1, xx); Console.WriteLine(" will need to look in region starting {0} with range {1}..{2}", nn, start, end-1); if (start == end) { Console.WriteLine("Suffix not found"); return -1; } } Console.WriteLine("Search done in {0}..{1}, {2} instances found.", start, end-1, end-start); return ptrs[start]; // TODO: Could return the range of occurrences using (map ptrs [start .. end-1]). } public int decode(int x) // Map final range back to haystack characters. { return ptrs[x]; } public fwd_bwt(string name, char [] data, char terminator) // constructor 1 of 2 { fwd_bwt_ctorworx(name, data, data.Length, terminator, ""); } public fwd_bwt(string name, char [] data, int len, char terminator, string cachefile) // constructor 2 of 2 { fwd_bwt_ctorworx(name, data, len, terminator, cachefile); } void sanity_checker() { bool errors = false; for (int i=0; i cb) { /*Console.WriteLine(" stop {0} > {1} true => false", ca, cb); */ans = false; break; } a = (a == length-1) ? 0: a+1; b = (b == length-1) ? 0: b+1; } // Strings are the same if (x==length) ans = false; // Console.WriteLine("is_lower {0} {1} {2} ", ls, rs, ans); return ans; } bool is_higher(int a, int b) { // Console.WriteLine(" is_higher {0} {1} start", a, b); for (int x=0; x cb) return true; else if (ca < cb) return false; a = (a == length-1) ? 0: a+1; b = (b == length-1) ? 0: b+1; } // Strings are the same return false; } void quicksort(int low, int high) { //Console.WriteLine ("quicksort {0}..{1}", low, high); if (high - low < 7) bubblesort(low, high); else if (low < high) { int pivot_loc = partition(low, high); quicksort(low, pivot_loc); //Console.WriteLine("Done low {0}", pivot_loc); quicksort(pivot_loc+1, high); //Console.WriteLine("Done high {0}", pivot_loc); } } void bubblesort(int low, int high) { // Console.WriteLine ("bubblesort {0}..{1}", low, high); for (int i=low; i= high) return low; while (low < high) { while (low < high) { if (sorter_is_lower(ptrs[low], pivot)) low ++; else break; } while (low < high) { if (is_higher(ptrs[high], pivot)) high --; else break; } if (low < high) swap(low, high); low ++; high --; } return low; } void swap(int a, int b) { int temp = ptrs[a]; ptrs[a] = ptrs[b]; ptrs[b] = temp; } public int FRead(System.IO.TextReader fd, char [] arg) // A garbage free reader. { int p = 0; while(true) { int c = fd.Read(); //Console.WriteLine(" t={1} line char {0:X}", c, Kiwi.tnow); arg[p++] = (char)c; if (c == -1 || (char)c == '\n') break; } Console.WriteLine(" line Read done at {0}", Kiwi.tnow); return p; } public static Int32 intParse(char [] c) { int r = 0; for (int i=0; true/*i= length) { Console.WriteLine("Cached data item out of range: {0} cf {1} in {2}", d, target_length, whence); return -1; } if (read_ptrs >= target_length) { Console.WriteLine("Too many cached data items: read={0} cf target={1} in {2}", read_ptrs, target_length, whence); return -1; } else ptrs[read_ptrs++] = d; continue; } if (cc == '%') { Console.WriteLine("Percentage comment line at {0}.", read_ptrs); FRead(reader, pqline);// reader.ReadLine(); c = reader.Read(); continue; } if (cc == 'M') { FRead(reader, pqline);// reader.ReadLine(); c = reader.Read(); continue; } if (cc == 'L') { // old toread_len = int.Parse(reader.ReadLine()); FRead(reader, pqline); c = reader.Read(); toread_len = intParse(pqline); if (toread_len != target_length) { Console.WriteLine("Target length wrong {0} cf {1} in {2}", toread_len, target_length, whence); return -1; } continue; } Console.WriteLine("Malformed cache file, unexpected char {0} after {1} in {2}", cc, read_ptrs, whence); return -1; } } Console.WriteLine("BWT: deserialised complete"); return 0; } public int serialise_save(string towhere) { System.IO.StreamWriter file = new System.IO.StreamWriter(towhere); file.WriteLine("%% sort of {0}", name); // comment file.WriteLine("M0"); // format file.WriteLine("L{0}", length); // length record for (int i=0; i {1}", c0, c, where); return c; } // Generate a test string by applying some edits to a fragment projection from the haystack. public string synthetic_needle(int origin, int len, int wilds, int edits, int inserts) { string ans = ""; int ex = 1; for (int i=0; i 0 && i%4 == 0 && i/4 >= 10 && i/4 <= wilds+10) c = 'n'; if (edits > 0 && i%4 == 2 && i/4 >= 10 && i/4 <= edits+10) c = transpose(i, c, ex++); ans = ans + c; } return ans; } } public static class bwt_test { static string lyrics = @" (C) Northern Songs 196X. She loves you, yeah, yeah, yeah She loves you, yeah, yeah, yeah She loves you, yeah, yeah, yeah, yeah You think you lost your love When I saw her yesterday It's you she's thinking of And she told me what to say She says she loves you And you know that can't be bad Yes, she loves you And you know you should be glad She said you hurt her so She almost lost her mind And now she says she knows You're not the hurting kind She says she loves you And you know that can't be bad Yes, she loves you And you know you should be glad, ooh She loves you, yeah, yeah, yeah She loves you, yeah, yeah, yeah And with a love like that You know you should be glad You know it's up to you I think it's only fair Pride can hurt you too Apologize to her Because she loves you And you know that can't be bad Yes, she loves you And you know you should be glad, ooh She loves you, yeah, yeah, yeah She loves you, yeah, yeah, yeah With a love like that You know you should be glad With a love like that You know you should be glad With a love like that You know you should be glad Yeah, yeah, yeah Yeah, yeah, yeah, yeah.\u0003"; static public void bwt_verbose_test(string needle0, string needle1, string haystack) { fwd_bwt bwt = new fwd_bwt("verbose_test", haystack.ToCharArray(), '$'); bwt.print_stats(); Console.WriteLine("Start search test string1."); int l0 = bwt.find_string (needle0.ToCharArray()); Console.WriteLine("Found string {0} at {1}", needle0, l0); int l1 = bwt.find_string (needle1.ToCharArray()); Console.WriteLine("Found string {0} at {1}", needle1, l1); } static public void bwt_beatles_test() { string needle0 = "love"; string needle1 = "told"; fwd_bwt bwt = new fwd_bwt("beatles_test", lyrics.ToCharArray(), '\u0003'); bwt.print_stats(); Console.WriteLine("Start search test string1."); int l0 = bwt.find_string (needle0.ToCharArray()); Console.WriteLine("Found Beatles string {0} at {1}", needle0, l0); int l1 = bwt.find_string (needle1.ToCharArray()); Console.WriteLine("Found Beatles string {0} at {1}", needle1, l1); } } // eof