2018 October 15 python

python3-基本数据类型

变量命名规范（标识符）

严格区分大小写

类名：每个单词第一个字母大写
方法名：驼峰式，例如check_login或者checkLogin
变量名：驼峰式，例如check_login或者checkLogin

不能使用的关键字列表

>>> import keyword
>>> keyword.kwlist
['False', 'None', 'True', 'and', 'as', 'assert', 'break', 'class', 'continue', '
def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if',
 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'retu
rn', 'try', 'while', 'with', 'yield']

【一】Integral类型

包括整数和布尔型

############    1.1：数学运算     ###################
1 + 2  # >3
1 - 2  # -1
2 * -3  # >-6
2 / -3  # > -0.6666666666666666
5 // 3  # > 1  舍弃小数部分取整
-5 % 3  # > 1 商是-2,余数是1
5 % 3  # > 求余数得2
5**-2  # > 0.04 ，求5的-2次方幂
abs(-1.9)  # > 1.9取绝对值
# divmod返回元组((x-x%y)/y,x%y)，（商、余数）
divmod(-5, 3)  # > (-2,1)
pow(5, -2)  # > 0.04等价于5**-2
# pow(x,y,z)类似(x**y)%z,当有第3个参数时，第2个不能取负数
pow(5, 2, 2)  # > 1
round(-1.6)  # > 结果四舍五入 -2
bin(-10)  # > 结果是字符串类型'-0b1010'
type(hex(-10))  # 结果是字符串 <class 'str'>
int(-1.9)  # >截取整数部分 -1
int('12', 11)  # > 13 表示进制是11的整数12转成10进制后的结果
import math
math.floor(1.6)  # > 1 取底
math.ceil(1.1)  # > 2 取顶

z = -1.5 + 2.1j
z.real, z.imag  # （-1.5,2.1）复数

############    1.2：逻辑运算     ###################
5 and 6  # > 6
5 or 6  # > 5
not 6  # > False
0 or 1  # > 1
1 == True  # > True
0 == False  # >True
2 == True  # > False 注意2不等价于True
2 == False  # > False，注意非1正整数做if判断时当做True

【二】字符串

表示较长字符串的两种方法

longstr = u'这一段没结束' + \
    u'接着来一段'
anotherstr = ("xxx"
              "bbbb")

chr(64)  # > @ 返回整数对应的Unicode字符
ord('A')  # > 65返回Unicode字符对应的整数

规避字符串中的转义使用r引导字符串

>>>r"句子中有\t"

字符串分片与步距

s = "he ate camel food"
s[::-2]  # >'do ea t h',注意反方向跳

字符串内置函数

'abc'.capitalize()  # > 首字母变大写'Abc'
# 固定序列可使用in，+=，*=等操作
str1='abcdefg'
str1.center(3)  # > 'abcdefg'
str1.center(10, '*')  # > '*abcdefg**'
str1.count('d')  # >  1
str1.encode('utf8')  # >得到二进制 b'abcdefg'
str1.endswith('g')  # > True
str1.startswith('a')  # >True 判断是否以指定字符开头
# str1.expandtabs(size) 返回str1的一个副本，将制表符换成8个或者size个空格
str1.find('b', 3, 6)  # > -1 返回从第3到第6个字符之间出现b的最左边的索引值，如果没有则返回-1
# str1.index('b',3,6) #> 出现ValueError错误，功能类似find，但是找不到时报错
str1.rindex('b')  # > 1  从右向左找，返回索引值
str1.isalnum()  # > True 如果strr1非空，并且每个字符都是字母数字，则返回True
str1.isalpha()  # > True 判断字符串是否全都是字母
str1.isdigit()  # >False  判断是否全都是数字
str1.isdecimal()  # > False 判断是否都是Unicode基数为10的数字
str1.isidentifier()  # > True 判断是否每个字符都是有效的字符
str1.islower()  # > True 判断字符串中至少有一个可小写字母，并且所有字母小写
str1.isnumeric()  # > False
str1.isprintable()  # > True，有换行符时返回False
str1.isspace()  # > False 如果字符串非空，每个字符都是空白字符，则返回True，制表符也认为空白符
str1.istitle()  # > False,非空且首字母大写的字符串则返回True
str1.isupper()  # > False 类似islower，判断是否都大写
'*'.join(str1)  # > 'a*b*c*d*e*f*g' 把前面的字符插入到后面字符串的每个字符之间
str1.ljust(10, '*')  # > 'abcdefg***' 补齐到10，左对齐，右侧用'*'填充
str1.lower()  # > 'abcdefg' 把字母变小写
str1.upper()  # > 'ABCDEFG'把能大写的都变大写
str1.partition('b')  # ('a','b','cdefg')返回元组，分隔符左边，分隔符，分隔符右边
str1.partition('s')  # ('abcdefg','','')返回元组
str2 = 'abcdb123'
str2.partition('b')  # >('a','b','cdb123')
str2.rpartition('b')  # > ('abcd','b','123')
# s.replace(t,u,n) 返回s的副本，其中每个t使用u替换，最多替换n个
str2.replace('b', 'B', 3)  # > 'aBcdB123'
str2.split('b')  # >得到列表['a','cd','123']按指定字符分割，最多分n次，即最多n+1个元素
str2.splitlines()  # > 以换行符分割
str2.strip()  # > 'abcdb123' 将两头的空白字符去掉
str2.lstrip()  # > 只去掉开头的空白
str2.rstrip()  # > 只去掉结尾的空白
str2.swapcase()  # > 'ABCDB123' 大小写字母互换
str2.title()  # > 'Abcdb123'
str2.zfill(10)  # > '00abcdb123' 补齐到10，左侧用0填充

len(str2)  # > 8  返回字符串中字符的个数

str3 = reversed(str2)  # 得到的一个迭代对象<reversed object at  0x000>,使用时才能抽取到元素
''.join(reversed(str2))  # > '321bdcba' 通过join运算迭代使用反转后的各个元素
str2[::-1]  # > '321bdcba'效果与上面一样

函数示例:从”what a rose“中抽取tag为red中间的数据#########

def extract_from_tag1(tag, line):
    opener = "<" + tag + ">"
    closer = "</" + tag + ">"
    try:
        i = line.index(opener)
        start = i + len(opener)
        j = line.index(closer, start)
        return line[start, j]
    except ValueError:
        return None


def extract_from_tag2(tag, line):
    opener = "<" + tag + ">"
    closer = "</" + tag + ">"
    i = line.find(opener)
    if i != -1:
        start = i + len(opener)
        j = line.find(closer, start)
        if j != -1:
            return line[start:j]
    return None

str1.count('m', 6) == str1[6:].count('m')  # > True，两者等价

# 提取路径与当前文件
spath = "/usr/local/bin/firefox"
result = spath.rpartition('/')  # >("/usr/local/bin","/","firefox")

format格式规约

'The novel {0} was published in {1}'.format("Hard Times", 1854)
# 将字符串与数字拼接的话会有报错，使用format可解决该问题
"{0}{1}".format("The amount is $", 200)
"{who} turned {age} this year".format(who='she', age=88)
# 同时使用位置参数和关键字参数时，关键字参数一定在位置参数后面
"The {who} was {0} last week!".format(12, who='boy')
# 也可使用索引
stock = ["paper", "envelops", "pens"]
"we have {0[1]} in stock".format(stock)
# 字典
d = dict(animal='elephant', weight=2000)
"the {0[animal]} weights {0[weight]}kg".format(d)
"the {animal} weights {weight}kg".format(**d)

# r,a强制表象形式，但是a仅限于ASCII字符  s强制字符串形式
import decimal
print("{0} {0!s} {0!r} {0!a}".format(decimal.Decimal("93.4")))
# 93.4  93.4 Decimal('93.4') Decimal('93.4')

# 字符串格式规约
s = "The sword of truth"
"{0:->25}".format(s)  # > '-------The sword of truth'右对齐，左侧用-补齐
"{0:.<25}".format(s)  # > 'The sword of truth.......'左对齐，右侧用.补齐
"{0:.10}".format(s)  # >'The sword' 最多保留10个字符
maxwidth = 12
"{0}".format(s[:maxwidth])
"{0:.{1}}".format(s, maxwidth)
# 整数格式规约
"{0:0>12}".format(-123)  # > '00000000-123' 宽度12位包括符号位
"{0:*<+12}".format(-123)  # > '-123********' 第一个是填充字符
"{0:*^+12}".format(-123)  # > '****-123****'  第二个是对齐，< > ^ =其中=表示在符号与值之间插入
"{0:*>+12}".format(-123)  # > '********-123'  第三个是+表示必须输出符号，是-表示只负数时输出符号
"{0:*=+12}".format(-123)  # > '-********123'
"{0:b} {0:o} {0:x} {0:X}".format(164)  # > '10100100 244 a4 A4'
"{0:#b} {0:#o} {0:#x} {0:#X}".format(164)  # > '0b10100100 0o244 0xa4 0XA4'

内置序列类型bytearray、bytes、list、str、tuple，序列类型可用*= ,in ,len() ,分片[] 操作

【三】元组

元组只有两个方法count与index

t1 = tuple("abca")  # > t1即 ('a','b','c','a')
t1.count('a')  # > 2
t1.index('a')  # > 0
hair = "a", "b", "c", "d"
hair[:2], 'gray', hair[2:]  # 得到元组(("a","b"),"gray",("c","d"))，元组的分片得到元组
# hair[:2]+"gray"+hair[2:] 这种写法是错误的，元组与字符串不能用+连接
hair[:2] + ("gray",) + hair[2:]  # >得到元组 ('a','b','gray','c','d')
# 元组只有一个元素时，最后的逗号不能省略
type((1))  # > 返回的是int
type((1,))  # > 返回的是tuple

# 命名元组collections.namedtuple()
import collections
# 第一个参数Sale是想创建的类名，第二个参数是空格隔开的字段名称
Sale = collections.namedtuple(
    "Sale", "productid customerid date quantity price")
sale1 = Sale(432, 921, "2008-09-14", 3, 7.99)
sale1.price  # > 7.99 命名元组可用 .字段名取值

【四】列表

列表使用[]创建

list1 = list("abc321")  # > 得到列表['a','b','c','3','2','1']
list1.append('x')  # > ['a','b','c','3','2','1','x']在最后追加一个元素
list1.count('a')  # > 1 统计元素出现次数
# list1.extend(m)  #>将迭代器m的项追加到list1的最后，类似 +=
list1.index('b', 0, 3)  # > 1 返回b在索引位置0到3之间最左边出现b的位置，如果没有则报错
list1.insert(3, 'd')  # > ['a','b','c','d','3','2','1','x']在索引位3的位置插入'd'

list1.pop()  # >返回并删除最右边的项，list1变成['a','b','c','d','3','2','1']
list1.pop(2)  # > ['a','b','d','3','2','1'] 删除指定索引位置的元素
list1.remove('a')  # >  ['b','d','3','2','1'] 删除最左边出现的指定元素
list1.reverse()  # > list1变成 ['1','2','3','d','b']
first, *rest = list1  # > rest为列表['2','3','d','b']
del rest[1]  # >删除rest中的索引值为1 的项

# 列表迭代器创建列表
leaps = []
for year in range(1900, 1940):
    if(year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):
        leaps.append(year)
# [item运算表达式   for item in 迭代器 if condition]
list2 = [a + b for a in ['a', 'b', 'c'] for b in ['x', 'y', 'z']]
# list2被创建为['ax','ay','az','bx','by','bz','cx','cy','cz']

【五】集合

可变集合set类型，固定集合frozenset类型
集合中的元素不能使用dict、list、set类型，只能使用可hash的类型，内置的有float、frozenset、int、str、tuple
集合中的元素没有索引值的概念，是无序的，没有重复的元素
集合可用于爬虫时做唯一值判断，避免重复爬取

s1 = set()  # 创建空集合，使用s1={}创建的是字典
s1 = set("apple")  # >得到集合{'a','p','l','e'}
s1.add('b')  # > 追加元素
s1.clear()  # > 清空元素
s1.copy()  # > 返回s1的一个浅拷贝，用is判断得False，用==判断得True
s1 = set("apple")
s1.pop()  # > 随机删除s1中一个元素，为空集时报错
s2 = {'a'}
s1.difference(s2)  # > 在s1中，但是不在s2中的，等价s1-s2,s1和s2没被改变
s1.difference_update(s2)  # > s1被改变，删除其中s2中的元素，等价于s1-=s2
s1.discard('a')  # >删除元素，不存在时不报错
s1.add('a')
s1.remove('a')  # >删除元素，不存在的元素时报错
s1.intersection(s2)  # >返回s1与s2的交集，等价于s1&s2
s1.intersection_update(s2)  # >求交集并赋值给s1，等价于s1&=s2
s1.isdisjoint(s2)  # >如果s1与s2没交集，则返回True
s1.issubset(s2)  # > s1如果是s2的子集，则返回True
s1.issuperset(s2)  # > s1如果是s2的超集，则返回True
s1.symmetric_difference(s2)  # >返回一个新集合，并集减去交集的结果，等价s1^s2
s1.symmetric_difference_update(s2)  # 等价于s1^=s2
s1.union(s2)  # > 返回一个新集合，返回s1与s2的并集，等价于s1|s2
s1.update(s2)  # > 等价于s1|=s2
# 处理不重复项示例
seen = set()
for i in range(10):
    if i not in seen:
        seen.add(i)
        # process(i)
ips=["192.168.0.1"]
for ip in set(ips):
    pass
#集合生成器
files=["index.html","t.css"]
html = { x for x in files if x.lower().endswith((".htm",".html"))}

#固定集合frozenset，上面集合中不影响集合自身的方法都适用于固定集合
#固定集合与普通集合做二元运算时，结果以左侧为准
s3 = frozenset("123") #> {'3','2','1'}

【六】字典

** 字典dict包含0个或者多个key-value对，key必须是可hash运算的，字典无序**

d1 = dict({"id":1948,"name":"Washer","size":3})
d2 = dict(id=1948,name="Washer",size=3)
d3 = dict([("id",1948),("name","Washer"),("size",3)])
d4 = dict(zip(("id","name","size"),(1948,"Washer",3)))
d5 = {"id":1948,"name":"Washer","size":3}

d1==d2==d3==d4==d5 #> True 上面五种创建字典的结果是一样的
d1.clear() #>移除d1中所有项
d1.copy() #> 返回d1的浅拷贝
s=["id","name"]
newv=[123,'test']
d2.fromkeys(s) #> 得到字典{"id":None,"name":None}
d2.fromkeys(s,newv) #> {"id":[123,'test'],"name":[123,'test']}
d2.get("id") #> 1948 ，根据key查找value
d2.get("score",100) #> 100，查找key,如果不存在，则返回默认值，字典本身没变
d2.items() #>返回所有(key,value)组成的视图，可迭代查询，不能更改
d2.keys() #返回所有key组成的视图，可迭代查询，不能更改
d2.values() #返回所有value组成的视图，可迭代查询，不能更改
d2.pop("id") #从d2中删除key为id的项，并将对应的value返回，如果key不存在，则报错
d2.pop("id",10) #从d2中删除key为id的项，如果不存在，返回默认值10
d2.popitem() #>返回值是被随机删除的一个key,value组成的字典,d2被改变
d2.setdefault("id",11) #>如果d2中有key为id则返回其value，如果没有，则添加key为id的项
#d2.update(a)  #>将a更新到d2中，a可以是字典，也可以是(key,value)迭代

d = {}.fromkeys("ABC",3) #>{"A":3,"B":3,"C":3} 实际字典是无序的
s = set("ACX")
matches = d.keys() & s #> {"A","C"} 视图与集合之间支持交集
matches = d.keys() | s #> {"A","C","X","B"} 支持并集
matches = d.keys() - s #> {"B"} 支持差集
matches = d.keys() ^ s #> {"X","B"} 返回并集减去交集的结果

#################################################
####用于统计文件中各个单词出现次数的示例
#################################################
import string
import sys
import os

words={}
strip = string.whitespace + string.punctuation + string.digits + "\""
for filename in sys.argv[1:]:
    for line in open(filename,encoding='utf8'):
        for word in line.lower().split():
            word = word.strip(strip)
            if len(word)>2:
                words[word] = words.get(word,0)+1
for word in sorted(words):
    print("'{0}' occurs {1} times".format(word,words[word]))
#################################################

#字典生成器 {key表达式：value表达式 for key,value in iterable if condition}
file_sizes={name:os.path.getsize(name) for name in os.listdir(".") if os.path.isfile(name)}
#字典生成器也可用于key与value的反转,如果失败会产品TypeError异常
inverted_d = {v:k for k,v in d2.items()}

#默认字典 defaultdict，所有方法与字典一样
import collections
words = collections.defaultdict(int)
#使用默认字典时，如果key值不存在，则自动添加该key，设置如图的Int函数的默认值
x=words["xyz"]
#另外一个创建默认值的方法，使用lambda
minus_one_dict = collections.defaultdict(lambda:-1)

#有序字典 collections.OrderedDict以数据的插入顺序进行存储
tasks = collections.OrderedDict()
tasks[8031]="aaa"
tasks[8032]="bbb"
tasks[8033]="ccc"
#使用popitem()来删除并返回最后一个键值项
#使用popitem(last=False)来删除并返回第一个键值项
tasks.popitem(last=False) #> （8031,'aaa')
#将普通字典转给有序字典
d = collections.OrderedDict(sorted(d.items()))

#任意包含__iter__()方法的对象都是迭代对象，每次返回其中一个数据项
x = [-2,9,7,-4,3]
all(x),any(x),len(x),min(x),max(x),sum(x) #> (True,True,5,-4,9,13)
x.append(0)
all(x),any(x),len(x),min(x),max(x),sum(x) #> (False,True,6,-4,9,13)

#实现一个类似于grep命令的脚本，第一个参数是关键字，后面是文件名
# if len(sys.argv)<3:
#     print("usage: {0} word infiles [infile2 [...infileN]]".format(sys.argv[0]))
#     sys.exit()
# word = sys.argv(1)
# for filename in sys.argv[2:]:
#     for lino,line in enumerate(open(filename),start=1):
#         if word in line:
#             print("{0}:{1}:{2:.40}".format(filename,lino,line.rstrip()))

for a,b in zip((1,2,3,),('a','b','c')):
    print(a,b)
#>(1,a),(2,b)
for t in zip(range(4),range(0,10,2),range(1,10,2)):
    print(t)  #>迭代每次前进一个，range(4)最多产生4个，所以结果最多4个值
#>(0,0,1)
#>(1,2,3)
#>(2,4,5)
#>(3,6,7)

其他参考

#python排序算法sorted()函数，可通过key指定排序函数，例如abs
#sorted只能用于所有元素是可比较的

#变量只是对象的的一个引用，如果同时指向同一个对象的两个变量，其中一个改变，另外一个也改变
a=[1,2,3]
b=a
a[1]=4
print(b[1])  #>结果是4
#列表可使用 b = a[:]的方式创建一个新的对象来实现复制
#字典可使用copy函数
#浅拷贝，元素的元素随着新值改变
x=[53,["B","D"]]
y=x[:]
x[0]=40
x[1][1]="Changed"
print(x,y)  #> [40, ['B', 'Changed']] [53, ['B', 'Changed']]
#深拷贝，元素的元素不能改变
import copy 
x1 = [53,["B","D"]]
y1 = copy.deepcopy(x1)
y1[0]=40
y1[1][1]="Changed"
print(x1,y1)  #> [53, ['B', 'D']] [40, ['B', 'Changed']]