Skip to main content

A small example package

Project description

RWexptest

This is a simple example package.

Pseqpa

这个工具包是用于对蛋白质序列进行简单处理的工作,其中涉及的主要函数功能有:

excel_csv_to_fasta

import os
import pandas
from RWexptest import Pseqpa

# 指定输入和输出文件夹路径
input_folder = "<需要转换的目标文件的路径>"
output_folder = "<保存路径>"
entry_column_name = "Entry"  # 请替换为您的entry列的名称
sequence_column_name = "Sequence"  # 请替换为您的sequence列的名称

# 调用函数并传递输入和输出文件夹路径
excel_csv_to_fasta(input_folder, output_folder, entry_column_name, sequence_column_name)

process_fasta_files

import os
from RWexptest import Pseqpa

# 指定输入目录、输出目录、批次大小以及最小和最大蛋白质序列长度
input_directory = "<fasta格式文件路径>"
output_directory = "<处理后的保存路径>"
batch_size = 500 #将fasta格式蛋白质序列按500个进行一次划分
min_sequence_length = 10 #筛选蛋白质序列最低不能小于10个氨基酸
max_sequence_length = 6000 #筛选蛋白质序列最高不能超过6000个氨基酸

# 处理fasta文件并将其分成批次,只保留符合长度条件的序列
process_fasta_files(input_directory, output_directory, batch_size, min_sequence_length, max_sequence_length)

create_blast_database(需要你的终端环境已经配置好了NCBI Blast工具)

from Bio.Blast.Applications import NcbimakeblastdbCommandline
from RWexptest import Pseqpa

# 指定构建数据库对象、数据库位置和数据库类型
input_fasta_path = "<your_train_data_path/train_data.fasta>" #路径不能有空格
output_db_path = "<Blast_database_path/Train_protein_seq_database>" #路径不能有空格
dbtype = "prot"  # 蛋白质数据库

# 构建数据库
result_message = create_blast_database(input_fasta_path, output_db_path, dbtype)

run_blastp(需要你的终端环境已经配置好了NCBI Blast工具)

from Bio.Blast.Applications import NcbiblastpCommandline
from RWexptest import Pseqpa

# 指定balst对象、数据库、结果目录和结果格式
query_fasta_path = "<your_test_data_path/test_data.fasta>"
blast_db_path = "<Blast_database_path/Train_protein_seq_database>"
output_file_path = "<your_save_path/test_data_blast_results.xml>"
custom_outfmt = 5  # 自定义输出格式

#进行同源性blast
result_message = run_blastp(query_fasta_path, blast_db_path, output_file_path, custom_outfmt)

parse_blast_xml_to_excel

import pandas as pd
from Bio import SearchIO

# 调用函数并传递输入XML文件和输出Excel文件的路径
input_xml = '<经过NCBI Blast处理后获得的xml文件路径/result.xml>'
output_excel = '<保存路径/reuslt.xlsx>'

parse_blast_xml_to_excel(input_xml, output_excel)

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

RWexptest-0.0.7.tar.gz (10.5 kB view hashes)

Uploaded Source

Built Distribution

RWexptest-0.0.7-py3-none-any.whl (12.7 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page