Pyteomics#

Launch on Binder

Install pyteomics#

[1]:
!pip install pyteomics lxml --quiet
[2]:
import pandas as pd
from pyteomics import mzml

Download Data#

[3]:
import requests

url = 'https://raw.githubusercontent.com/levitsky/pyteomics/master/tests/test.mzML'
file_name = 'test.mzML'

# # Send a GET request to the URL
response = requests.get(url)

# # Save the content of the response to a file
with open(file_name, 'wb') as file:
    file.write(response.content)

print(f'File {file_name} downloaded successfully!')
File test.mzML downloaded successfully!

Load .mzML file and convert to pd.DataFrame#

[4]:
input_file = "./test.mzML"

ms_level, rt, mz, intens = [], [], [], []
with mzml.MzML(input_file, decode_binary=False) as reader:
    for scan in reader:
        ms_level.append(scan['ms level'])
        rt.append(scan['scanList']['scan'][0]['scan start time'])
        mz.append(scan['m/z array'].decode())
        intens.append(scan['intensity array'].decode())
df = pd.DataFrame({'ms_level':ms_level, 'rt':rt, 'mz':mz, 'int':intens})
# Explode mzarray and intarray columns to make the DataFrame long
df = df.explode(['mz', 'int'])
df
[4]:
ms_level rt mz int
0 1 0.004935 200.000188 0.0
0 1 0.004935 200.00043 0.0
0 1 0.004935 200.000673 0.0
0 1 0.004935 200.000915 0.0
0 1 0.004935 202.605829 0.0
... ... ... ... ...
1 1 0.005935 1999.913086 0.0
1 1 0.005935 1999.937256 0.0
1 1 0.005935 1999.961548 0.0
1 1 0.005935 1999.985718 0.0
1 1 0.005935 2000.009888 0.0

39828 rows × 4 columns