Pyteomics#
Install pyteomics#
[1]:
!pip install pyteomics lxml --quiet
[2]:
import pandas as pd
from pyteomics import mzml
Download Data#
[3]:
import requests
url = 'https://raw.githubusercontent.com/levitsky/pyteomics/master/tests/test.mzML'
file_name = 'test.mzML'
# # Send a GET request to the URL
response = requests.get(url)
# # Save the content of the response to a file
with open(file_name, 'wb') as file:
    file.write(response.content)
print(f'File {file_name} downloaded successfully!')
File test.mzML downloaded successfully!
Load .mzML file and convert to pd.DataFrame#
[4]:
input_file = "./test.mzML"
ms_level, rt, mz, intens = [], [], [], []
with mzml.MzML(input_file, decode_binary=False) as reader:
    for scan in reader:
        ms_level.append(scan['ms level'])
        rt.append(scan['scanList']['scan'][0]['scan start time'])
        mz.append(scan['m/z array'].decode())
        intens.append(scan['intensity array'].decode())
df = pd.DataFrame({'ms_level':ms_level, 'rt':rt, 'mz':mz, 'int':intens})
# Explode mzarray and intarray columns to make the DataFrame long
df = df.explode(['mz', 'int'])
df
[4]:
| ms_level | rt | mz | int | |
|---|---|---|---|---|
| 0 | 1 | 0.004935 | 200.000188 | 0.0 | 
| 0 | 1 | 0.004935 | 200.00043 | 0.0 | 
| 0 | 1 | 0.004935 | 200.000673 | 0.0 | 
| 0 | 1 | 0.004935 | 200.000915 | 0.0 | 
| 0 | 1 | 0.004935 | 202.605829 | 0.0 | 
| ... | ... | ... | ... | ... | 
| 1 | 1 | 0.005935 | 1999.913086 | 0.0 | 
| 1 | 1 | 0.005935 | 1999.937256 | 0.0 | 
| 1 | 1 | 0.005935 | 1999.961548 | 0.0 | 
| 1 | 1 | 0.005935 | 1999.985718 | 0.0 | 
| 1 | 1 | 0.005935 | 2000.009888 | 0.0 | 
39828 rows × 4 columns