python常用代码片

下载文件

import requests
r = requests.get('http://i.pegpic.com/pic/028326/koqn0d5tgankoqn0d5tgan.jpg');
filename = 'D://a.jpg'
target = open(filename,'wb')
target.write(r._content);
target.close();

文件查找：

import glob

#获取指定目录下的所有图片

print glob.glob(r"E:/Picture/*/*.jpg")

#获取上级目录的所有.py文件

print glob.glob(r'../*.py') #相对路径

目录文件处理

#coding: utf-8
import os;
import re;

#主函数
def main(fpath):
    #枚举fpath目录下的每个文件
    for root, dirs, files in os.walk( fpath):
        for filename in files:
            full_file = '%s\%s' % (root,filename)
            print full_file
            process_a_file(full_file);

#处理单个文件
def process_a_file(afile):
    print afile;
    ofile = open(afile,'r')
    wfile = open("out.txt",'a')
    str_head = "NEW DOC %s \n" % afile
    wfile.write(str_head)

    do_write = 0;
    for eachline in ofile:
        aline = eachline.rstrip()
        if None <> re.match('"*"',aline):
            if None <> re.match('"toBI"',aline):
                do_write = 1;
            else:
                do_write = 0;
        else:
            if do_write == 1:
                wfile.write(aline);
                wfile.write("\n")
    wfile.close()
    ofile.close()

#主函数调用
main('D:\Temp');

文件统计：

#coding: utf-8
import os;
import re;
import sys;
dic_word = {'test' : 1};
dic_phone = {'HH' : 1};
dic_b_phone = {'HH HA' : 1};
dic_t_phone = {'HH HA HA' : 1};
dic_sen_len = {8: 10};#长度为8个单词的句子有10个
dic_rhythm_word_len = {2 : 5 };#韵律长度统计,单词级别,长度为2个word的韵律短语有5个
dic_rhythm_phone_len = {2 : 5};#韵律长度统计,phone级别，长度为2个phome的韵律短语有5个

#主函数
def main(fpath):

    dic_word.clear();
    dic_phone.clear();
    dic_b_phone.clear();
    dic_t_phone.clear();
    dic_sen_len.clear();
    dic_rhythm_word_len.clear();
    dic_rhythm_phone_len.clear();

    #处理文件
    ofile = open(fpath,'r');
    order = 1;
    for eachline in ofile:
        aline = eachline;
        if (order % 2 == 1):# 句子行处理
            process_word_line(aline[7:]);
        else: #音标行处理
            process_phone_line(aline);
        order = order + 1;

    #输出结果
    analysis_out();

def process_word_line(aline):#单词行处理
    aline = aline.lower();
    #韵律短语统计
    rhythm_word_calc(aline);# /%隔开统计word级别的韵律短语
    #换掉斜线等符号
    aline = clean_aline(aline);
    #切割统计
    words = aline.split(' ');
    words = [w for w in words if w != ""]
    #统计句子长度
    dic_add(dic_sen_len,len(words));
    #统计单词
    for word in words:
        dic_add(dic_word, word);
    return;

def process_phone_line(aline): #音标行处理
    #韵律短语统计
    rhythm_phone_calc(aline);# /.隔开统计phome级别的韵律短语
    #换掉斜线等符号
    aline = clean_aline(aline);
    #切割统计
    phones = aline.split(' ');
    #phones.remove('');
    phones = [w for w in phones if w != ""]
    for phone in phones:
        dic_add(dic_phone, phone);
    #统计双音子
    for i in range(0, len(phones)-1):
        b_phone = "%s-%s" % (phones[i], phones[i+1]);
        dic_add(dic_b_phone, b_phone);
    #统计三音子
    for i in range(0, len(phones)-2):
        t_phone = "%s-%s-%s" % (phones[i], phones[i+1], phones[i+2]);
        dic_add(dic_t_phone, t_phone);
    return;

def rhythm_word_calc(aline):# 单词以/或者%结尾表示一个韵律短语
    aline = clean_aline_word_rhythm(aline);
    #print(aline)
    words = aline.split(' ');
    words = [w for w in words if w != ""]
    #print(len(words));
    pre_pos = -1;
    for i in range(0, len(words)):
        if (words[i].find('/') != -1 or words[i].find('%') != -1):
            dic_add(dic_rhythm_word_len, i-pre_pos);
            #print 'found %d' % (i-pre_pos);
            pre_pos = i;
        #print(words[i]);
    return;

def rhythm_phone_calc(aline):# 单词以/或者%结尾表示一个韵律短语
    aline = clean_aline_phone_rhythm(aline);
    #print(aline)
    phones = aline.split(' ');
    phones = [w for w in phones if w != ""]
    #print(len(phones));
    pre_pos = -1;
    for i in range(0, len(phones)):
        if (phones[i].find('/') != -1 or phones[i].find('.') != -1):
            dic_add(dic_rhythm_phone_len, i-pre_pos);
            #print 'found %d' % (i-pre_pos);
            pre_pos = i;
        #print(phones[i]);
    return;

#output results
def analysis_out():
    output_a_dic(dic_word, 'word.log');
    output_a_dic(dic_phone, 'phone.log');
    output_a_dic(dic_sen_len, 'sen_len.log');
    output_a_dic(dic_b_phone, 'dic_bi_phone.log');
    output_a_dic(dic_t_phone, 'dic_tri_phone.log');
    output_a_dic(dic_rhythm_word_len, 'dic_rhythm_word_len.log');
    output_a_dic(dic_rhythm_phone_len, 'dic_rhythm_phone_len.log');
    return;

def output_a_dic(a_dic, filename):
    a_list = sorted(a_dic.iteritems(), key = lambda asd:asd[1], reverse = True);#value进行排序
    wfile = open(filename,'w')
    for a_turp in a_list:
        aline =  '%s %d \n' % (a_turp[0], a_turp[1])
        wfile.write(aline);
    wfile.close();
    return;

#辅助函数
def clean_aline(aline):
    #print(aline);
    regex = re.compile(r"\/\s", re.IGNORECASE);
    aline = regex.sub(" ",aline);
    regex = re.compile(r"\.\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\,\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\!\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\"\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    aline = aline.replace("%"," ");
    aline = aline.rstrip();
    aline = aline.lstrip();
    #print(aline);
    return aline;
def clean_aline_word_rhythm(aline):
    regex = re.compile(r"\.\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\,\s", re.IGNORECASE);
    aline = regex.sub(" ",aline);
    regex = re.compile(r"\!\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\"\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    aline = aline.replace('"','');
    aline = aline.rstrip();
    aline = aline.lstrip();
    return aline;
def clean_aline_phone_rhythm(aline):
    regex = re.compile(r"\,\s", re.IGNORECASE);
    aline = regex.sub(" ",aline);
    regex = re.compile(r"\!\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    regex = re.compile(r"\"\s", re.IGNORECASE);
    aline = regex.sub("",aline);
    aline = aline.replace(" /","/");
    aline = aline.replace(" .",".");
    aline = aline.rstrip();
    aline = aline.lstrip();
    aline = aline + '.'
    return aline;
def dic_add(adic, akey):
    if adic.has_key(akey) != True:
        adic[akey] = 1;
    else:
        v = adic[akey];
        adic[akey] = v + 1;
    return ;

if len(sys.argv) == 1:#默认参数
    #主函数调用
    main('C:\\Users\\huangzhiqiang\\PycharmProjects\\untitled\\ef4_6k.txt');
elif len(sys.argv) == 2:
    print(sys.argv[1]);
    main(str(sys.argv[1]));
else:
    print("parameters error\n");
    #main('C:\\Users\\huangzhiqiang\\PycharmProjects\\untitled\\ef4_6k.txt');

python常用代码片

浏览过的版块