Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

I have the following string: ORF= \'MGRRGQEAATQQGVRCTFLKRSSLGHVLKVTSSLWPSQNLSLRT

ID: 3850901 • Letter: I

Question


I have the following string: ORF= 'MGRRGQEAATQQGVRCTFLKRSSLGHVLKVTSSLWPSQNLSLRTVLASAAPRYIRGWARSSLHSPLKQMPRSPFLHPHLMTADSSVLLSKVLGDLGSQGAPRCLPLGEHPITPGGGRGCLPEWARPLSPGLTAAP*MWALGPVGPHPVWVTLPLTWVQPGWRWVGVRPRAGGQAGTVSP*ME*'

I want to split it into seperate vectors so that each vector starts with 'M' and ends with * as shown:

ORF1= MGRRGQEAATQQGVRCTFLKRSSLGHVLKVTSSLWPSQNLSLRTVLASAAPRYIRGWARSSLHSPLKQMPRSPFLHPHLMTADSSVLLSKVLGDLGSQGAPRCLPLGEHPITPGGGRGCLPEWARPLSPGLTAAP*

ORF2= MWALGPVGPHPVWVTLPLTWVQPGWRWVGVRPRAGGQAGTVSP*

ORF3=ME*

Assume that the original string is unknown. How would this be done? Thank you.

Explanation / Answer

Assuming the given string always starts from M and ends with *, i.e, there is always one such string satisfying the condition. This can be done in C++ using vectors.

This also covers all corner cases where a part of string can contain * in between without actually breaking.

#include <iostream>
#include<vector>
#include<string>
using namespace std;

vector<string> findParts(string s){
vector<int> indices;
vector<string> ans;
int lastWasM=0,isFirstStar=1;
for(int i=0;i<s.length();i++){
if(lastWasM==0&&s[i]=='M'){
//look for M
indices.push_back(i);
//now we have to search for *
lastWasM=1;
isFirstStar=0;
}else if(s[i]=='*'){
//look for *
if(isFirstStar==0){
isFirstStar=1;
indices.push_back(i);
}else{
//if * is encountered again after a *
//then we take the latest * into consideration
indices[indices.size()-1]=i;
}
lastWasM=0;
}
}
//save the last M-* pair.
for(int i=0;i<indices.size();i+=2){
int start = indices[i];
int end = indices[i+1];
ans.push_back(s.substr(start,end-start+1));
}
return ans;
}

int main() {
   vector<string> part = findParts("MGRRGQEAATQQGVRCTFLKRSSLGHVLKVTSSLWPSQNLSLRTVLASAAPRYIRGWARSSLHSPLKQMPRSPFLHPHLMTADSSVLLSKVLGDLGSQGAPRCLPLGEHPITPGGGRGCLPEWARPLSPGLTAAP*MWALGPVGPHPVWVTLPLTWVQPGWRWVGVRPRAGGQAGTVSP*ME*");
   for(int i=0;i<part.size();i++){
   cout<<part[i]<<endl;
   }
}