Pyteomics#
Install pyteomics#
[1]:
!pip install pyteomics lxml --quiet
[2]:
import pandas as pd
from pyteomics import mzml
Download Data#
[3]:
import requests
url = 'https://raw.githubusercontent.com/levitsky/pyteomics/master/tests/test.mzML'
file_name = 'test.mzML'
# # Send a GET request to the URL
response = requests.get(url)
# # Save the content of the response to a file
with open(file_name, 'wb') as file:
file.write(response.content)
print(f'File {file_name} downloaded successfully!')
File test.mzML downloaded successfully!
Load .mzML
file and convert to pd.DataFrame
#
[4]:
input_file = "./test.mzML"
ms_level, rt, mz, intens = [], [], [], []
with mzml.MzML(input_file, decode_binary=False) as reader:
for scan in reader:
ms_level.append(scan['ms level'])
rt.append(scan['scanList']['scan'][0]['scan start time'])
mz.append(scan['m/z array'].decode())
intens.append(scan['intensity array'].decode())
df = pd.DataFrame({'ms_level':ms_level, 'rt':rt, 'mz':mz, 'int':intens})
# Explode mzarray and intarray columns to make the DataFrame long
df = df.explode(['mz', 'int'])
df
[4]:
ms_level | rt | mz | int | |
---|---|---|---|---|
0 | 1 | 0.004935 | 200.000188 | 0.0 |
0 | 1 | 0.004935 | 200.00043 | 0.0 |
0 | 1 | 0.004935 | 200.000673 | 0.0 |
0 | 1 | 0.004935 | 200.000915 | 0.0 |
0 | 1 | 0.004935 | 202.605829 | 0.0 |
... | ... | ... | ... | ... |
1 | 1 | 0.005935 | 1999.913086 | 0.0 |
1 | 1 | 0.005935 | 1999.937256 | 0.0 |
1 | 1 | 0.005935 | 1999.961548 | 0.0 |
1 | 1 | 0.005935 | 1999.985718 | 0.0 |
1 | 1 | 0.005935 | 2000.009888 | 0.0 |
39828 rows × 4 columns