I copied the example and change function parse_contents() like this:
def parse_contents(contents, filename, date):
content_type, content_string, = contents.split(',')
print(f'name:{type(filename)}\n{filename}\n')
print(f'type:{type(content_type)}\n{content_type}\n')
print(f'string:{type(content_string)}\n{content_string}\n')
decoded = base64.b64decode(content_string)
print(f'decoded:{decoded}\n')
#seq1 = SeqIO.parse(decoded, "fasta")
#print(f'seq parsed {seq1}')
seq_str = str(decoded)
print(f'seq_str: {seq_str}')
#split into lines for output as P's
seq_arr = seq_str.split('\n')
#replace \n
seq_arr = [x.replace("\n", " ") for x in seq_arr]
print('seq_arr')
for line in seq_arr:
print(line)
'''
try:
seq = SeqIO.read(decoded, "fasta")
print(f'\n\nseq:{seq}')
except Exception as e:
print(e)
return html.Div([
'Error processing file upload'
])
'''
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
html.Div([
html.P(line)
for line in seq_arr
]),
html.Hr(),
#debugging content
html.Div('Raw Content'),
html.Pre(contents[0:100] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
But it won’t split on newline nor replace them in the above snippet.
fasta is a very common text format for sequences.
This is the output on stdout
string:<class 'str'>
PldQXzAxMTE0MzMxNC4xIEwtbGFjdGF0ZSBkZWh5ZHJvZ2VuYXNlIFtHbG9lb2JhY3RlciB2aW9sYWNldXNdCk1RRFJMRlZTTUVIUFJBTFBFVERMSUtHQUlWR0FHQVZHTUFJQVlTTUxJUU5URkRFTFZMVkRJRFJSS1ZFR0VWTURMVkhHSVAKRlZFUFNWVlJBR1RMQURDUkdWRFZWVklUQUdBUlFSRUdFVFJMU0xWUVJOVkVJRlJHTElHRUlNRUhDUE5BSUxMVlZTTlBWRApWTVRZVkFNS0xBR0xQUFNSVklHU0dUVkxEVEFSRlJZTExBRVJMUlZEUFJTTEhBWUlJR0VIR0RTRVZQVldTUkFOVkFHQUZMClNFSUVQQVZHVFBERFBBS01GRVZGRUhWS05BQVlFSUlFUktHQVRTV0FJR0xBVFRRSVZSQUlUUk5RTlJWTFBWU1ZMTVNHTEgKR0lFRVZDTEFZUEFWTE5SUUdJRFJMVktGU0xTUEdFRUVRTFFSU0FSVk1SUVRMREdJUUYKCg==
decoded:b'>WP_011143314.1 L-lactate dehydrogenase [Gloeobacter violaceus]\nMQDRLFVSMEHPRALPETDLIKGAIVGAGAVGMAIAYSMLIQNTFDELVLVDIDRRKVEGEVMDLVHGIP\nFVEPSVVRAGTLADCRGVDVVVITAGARQREGETRLSLVQRNVEIFRGLIGEIMEHCPNAILLVVSNPVD\nVMTYVAMKLAGLPPSRVIGSGTVLDTARFRYLLAERLRVDPRSLHAYIIGEHGDSEVPVWSRANVAGAFL\nSEIEPAVGTPDDPAKMFEVFEHVKNAAYEIIERKGATSWAIGLATTQIVRAITRNQNRVLPVSVLMSGLH\nGIEEVCLAYPAVLNRQGIDRLVKFSLSPGEEEQLQRSARVMRQTLDGIQF\n\n'
seq_str: b'>WP_011143314.1 L-lactate dehydrogenase [Gloeobacter violaceus]\nMQDRLFVSMEHPRALPETDLIKGAIVGAGAVGMAIAYSMLIQNTFDELVLVDIDRRKVEGEVMDLVHGIP\nFVEPSVVRAGTLADCRGVDVVVITAGARQREGETRLSLVQRNVEIFRGLIGEIMEHCPNAILLVVSNPVD\nVMTYVAMKLAGLPPSRVIGSGTVLDTARFRYLLAERLRVDPRSLHAYIIGEHGDSEVPVWSRANVAGAFL\nSEIEPAVGTPDDPAKMFEVFEHVKNAAYEIIERKGATSWAIGLATTQIVRAITRNQNRVLPVSVLMSGLH\nGIEEVCLAYPAVLNRQGIDRLVKFSLSPGEEEQLQRSARVMRQTLDGIQF\n\n'
seq_arr
b'>WP_011143314.1 L-lactate dehydrogenase [Gloeobacter violaceus]\nMQDRLFVSMEHPRALPETDLIKGAIVGAGAVGMAIAYSMLIQNTFDELVLVDIDRRKVEGEVMDLVHGIP\nFVEPSVVRAGTLADCRGVDVVVITAGARQREGETRLSLVQRNVEIFRGLIGEIMEHCPNAILLVVSNPVD\nVMTYVAMKLAGLPPSRVIGSGTVLDTARFRYLLAERLRVDPRSLHAYIIGEHGDSEVPVWSRANVAGAFL\nSEIEPAVGTPDDPAKMFEVFEHVKNAAYEIIERKGATSWAIGLATTQIVRAITRNQNRVLPVSVLMSGLH\nGIEEVCLAYPAVLNRQGIDRLVKFSLSPGEEEQLQRSARVMRQTLDGIQF\n\n'
Newlines are there in the output div as well. How can I go from base64 to a Python string and then run strip and replace from a fasta file upload to dcc.Upload()?
Here is a raw sample that can be copy and pasted into a file called s.fasta to try:
>WP_011143314.1 L-lactate dehydrogenase [Gloeobacter violaceus]
MQDRLFVSMEHPRALPETDLIKGAIVGAGAVGMAIAYSMLIQNTFDELVLVDIDRRKVEGEVMDLVHGIP
FVEPSVVRAGTLADCRGVDVVVITAGARQREGETRLSLVQRNVEIFRGLIGEIMEHCPNAILLVVSNPVD
VMTYVAMKLAGLPPSRVIGSGTVLDTARFRYLLAERLRVDPRSLHAYIIGEHGDSEVPVWSRANVAGAFL
SEIEPAVGTPDDPAKMFEVFEHVKNAAYEIIERKGATSWAIGLATTQIVRAITRNQNRVLPVSVLMSGLH
GIEEVCLAYPAVLNRQGIDRLVKFSLSPGEEEQLQRSARVMRQTLDGIQF