Code written in C++. Recently, a research team collected genetic samples from ar
ID: 639076 • Letter: C
Question
Code written in C++.
Recently, a research team collected genetic samples from around the Great Smoky Mountains in search of new species. You've enlisted to help the team by making a database of the genetic sequences. For this lab, you get to parse the input and store it in DNA objects (in preparation for inserting the information into a linked list). The parsing will be done in a class called SequenceDatabase. A description of these classes follows:
class DNA:
This class represents a single DNA sequence and should contain:
Appropriate constructor(s)
Data members to store:
Label
Accession ID (which is unique)
Sequence
Length of the sequence
Index of the coding region (or -1 if not applicable)
A print() method that prints the above information (used in lab1)
Appropriate "get" and "set" methods
class SequenceDatabase:
This class should contain:
Appropriate constructor(s)
Method to process commands from a specified file. Commands are as follows (fields are separated by tabs):
D (allocates memory for a new DNA object, which in lab1 will be added to a linked list; for now, allocate memory and print out "Adding ...", where is the ID number (see the example output below))
O (in lab1, obliterates the specified DNA entry; for now, print out "Obliterating ...")
P (in lab1, prints the specified DNA entry; for now, print out "Printing ...")
S (in lab1, displays the number of DNA entries; for now, print out "Entries: NYI")
Driver file
#include
#include
using namespace std;
// notice the first letter is a lower case s
#include "sequenceDatabase.h"
int main( /*int argc, char argv[] */ ){
string commandsFilename = "lab0-commands-short.tab";
// Read in a filename from STDIN (or defualt to one)
// If nothing is entered (really just a return) then use the listed filename.
// Otherwise, read one from STDIN.
char firstChar;
string stdinFilename;
cout << "Please enter the commands filename (or simply press return to use " << commandsFilename << ") ";
cin.get( firstChar);
if( firstChar != ' '){
cin >> stdinFilename;
// replace the default filename
commandsFilename = firstChar + stdinFilename;
}
SequenceDatabase entries; // use SequenceDatabase entries{ }; for C++ 11
cout << "Importing " << commandsFilename << endl;
entries.importEntries( commandsFilename);
return 0;
}
Data File
D taxon1 12345 agtcgatcagaagatctcct 20 -1
P 12345
O 12345
S
P 9999
O 9999
Explanation / Answer
#define BASE_MASK 0x3 /* binary: 11 */
/* useful constants */
enum
{
BASE_A = 0x0, /* binary: 00 */
BASE_C = 0x1, /*'binary: 01 */
BASE_G = 0x2, /* binary: 10 */
BASE_T = 0x3, /* binary: 11 */
};
class dna_bitset
{
public:
/**
* @brief constructor
* @param dna_str a string containing a DNA sequence
* @param dna_len length of the DNA sequence
*/
dna_bitset (const char* dna_str, const size_t dna_len)
{
m_len = dna_len;
/* bytes necessary to store dna_str as a bitset */
size_t dna_bytes = (dna_len / 4) + (dna_len % 4 != 0);
m_data = new uint8_t[dna_bytes];
std::memset(m_data, 0, dna_bytes);
/* for each base of the DNA sequence */
for (size_t i = 0; i < dna_len; i++)
{
uint8_t shift = 6 - 2*(i % 4);
switch (dna_str[i])
{
case 'A':
m_data[i/4] |= BASE_A << shift;
break;
case 'C':
m_data[i/4] |= BASE_C << shift;
break;
case 'G':
m_data[i/4] |= BASE_G << shift;
break;
case 'T':
m_data[i/4] |= BASE_T << shift;
break;
default:
throw std::invalid_argument("invalid DNA base");
}
shift = (shift == 0) ? 6 : shift-2;
}
}
/**
* @brief destructor
*/
~dna_bitset ()
{
delete[] m_data;
}
/**
* @brief returns the stored DNA sequence as a string
*/
char* to_string () const
{
char* dna_str = new char[m_len+1];
/* for each base of the DNA sequence */
for (size_t i = 0; i < m_len; i++)
{
uint8_t shift = 6 - 2*(i % 4);
uint8_t mask = BASE_MASK << shift;
/* get the i-th DNA base */
uint8_t base = (m_data[i/4] & mask) >> shift;
switch (base)
{
case BASE_A:
dna_str[i] = 'A';
break;
case BASE_C:
dna_str[i] = 'C';
break;
case BASE_G:
dna_str[i] = 'G';
break;
case BASE_T:
dna_str[i] = 'T';
break;
default:
throw std::runtime_error("invalid DNA base");
}
}
dna_str[m_len] = '';
return dna_str;
}
private:
uint8_t* m_data;
size_t m_len;
};