# 使用python语言，统计列表的四分位数

import os

import gzip

import pandas as pd

import numpy as np

def cal_length(f):

seq = []

tlen = len(tt)

plen = int(tlen/4)

for i in range(0,plen):

start,end = (i*4,(i+1)*4)

pdna = tt[start:end][1]

seq.append(len(pdna))

#ord 是 数据排序(从低到高，从高到低)

def quantile_exc(data, n ,ord = 'asc',interpolation='lower'):  # 其中data为数据组，n为第几个百分位数

"""

data：最好输入pandas的列，例如 data.column

n：小数百分比

interpolation：Lower表示向下取整，higher表示向上取整

"""

import math

dic = {}

a = 1

data = list(np.sort(data))

if ord == 'asc':

data = data

elif ord == 'desc':

data.reverse()

for i in data:

dic[a] = i

a = a+1

value = ((a-1)*n)

if interpolation == 'lower':

return dic[math.floor(value)]

elif interpolation == 'higher':

return dic[math.ceil(value)]

gz_names = [i for i in os.listdir(fqGzPath) if 'fastq.gz' in i]

with open('fqGzLenSta.txt','w') as fw:

fw.writelines('{0}\t{1}\t{2}\t{3}\n'.format('sample','lower','median','upper'))

for i in gz_names:

fileName = i.split('.fastq.gz')[0]

fqGzFile = '{0}/{1}'.format(fqGzPath,i)

df = pd.Series(cal_length(fqGzFile)[2])

q1 = quantile_exc(df, 0.25, ord = 'asc', interpolation='lower') #2

q2 = quantile_exc(df, 0.5, ord = 'asc', interpolation='lower') #5

q3 = quantile_exc(df, 0.75, ord = 'asc', interpolation='lower') #5

fw.writelines('{0}\t{1}\t{2}\t{3}\n'.format(fileName,q1,q2,q3))

https://m.sciencenet.cn/blog-994715-1316960.html

## 全部精选博文导读

GMT+8, 2022-11-27 10:25