A.set()
B.{1, 2, 3, 4}
C.{1, 2, 2, 3, 3 ,3, 4}
D.[1, 2, 2, 3, 3, 3, 4]
第4题
下面的一段python程序的目的是利用皮尔逊相关系数进行iris数据集特征选择 import numpy as np from scipy.stats import pearsonr from sklearn import datasets iris = datasets.load_iris() print ("Pearson's correlation coefficient between column #1 and target column", pearsonr(iris.data[:,0], iris.target )) print ("Pearson's correlation coefficient between column #2 and target column", pearsonr(iris.data[:,1], iris.target )) print ("Pearson's correlation coefficient between column #3 and target column", pearsonr(iris.data[:,2], iris.target )) print ("Pearson's correlation coefficient between column #4 and target column", pearsonr(iris.data[:,3], iris.target )) 其输出结果为: ("Pearson's correlation coefficient between column #1 and target column", (0.7825612318100814, 2.890478352614054e-32)) ("Pearson's correlation coefficient between column #2 and target column", (-0.4194462002600275, 9.159984972550002e-08)) ("Pearson's correlation coefficient between column #3 and target column", (0.9490425448523336, 4.1554775794971695e-76)) ("Pearson's correlation coefficient between column #4 and target column", (0.9564638238016173, 4.775002368756619e-81)) 则如果去掉一个特征,应该选择哪一个特征去掉?
A、#1
B、#2
C、#3
D、#4
第7题
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
1、(1) books.htm
(2) database.htm
(3) program.htm
(4) network.htm
(5) mysql.htm
(6) python.htm
(7) java.htm
from bs4 import BeautifulSoup import urllib.request def spider(url): try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) ____________________________________ for link in links: href=link["href"] ___________________________________ spider(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End") 递归调用
A、links=soup.select("a");url=start_url+href
B、links=soup.select("li");url=start_url+"/"+href
C、links=soup.select("a");url=start_url+"/"+href
D、links=soup.select("li");url=start_url+href
参考答案:<span style="color:rgb(255, 0, 0)">C</span>
1、from bs4 import BeautifulSoup import urllib.request class Stack: def __init__(self): self.st=[] def pop(self): return self.st.pop() def push(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): stack=Stack() stack.push(url) while not stack.empty(): url=stack.pop() try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for i in _______________________________: href=links[i]["href"] url=start_url+"/"+href stack.push(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End")
A、range(len(links)-1,-1,-1)
B、range(len(links),-1,-1)
C、range(len(links)-1,0,-1)
D、range(len(links),0,-1)
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
1、在主线程中启动一个子线程执行reading函数。 import threading import time import random def reading(): for i in range(10): print("reading",i) time.sleep(random.randint(1,2)) _______________________________ r.setDaemon(False) r.start() print("The End")
A、r=threading.Thread(reading)
B、r=threading.Thread(target=reading())
C、r=threading.Thread(target=reading)
D、r=Thread(target=reading)
参考答案:<span style="color:rgb(255, 0, 0)">C</span>
1、不重复访问网站,使用队列的程序 from bs4 import BeautifulSoup import urllib.request class Queue: def __init__(self): self.st=[] def fetch(self): return self.st.pop(0) def enter(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): global urls queue=Queue() queue.enter(url) while ________________________: url=queue.fetch() if url not in urls: try: urls.append(url) data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: ________________ url=start_url+"/"+href queue.enter(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" urls=[] spider(start_url) print("The End")
A、queue.empty(); href=link["href"]
B、not queue.empty(); href=link["href"]
C、queue.empty(); href=link.href
D、not queue.empty(); href=link.href
参考答案:<span style="color:rgb(255, 0, 0)">B</span>
1、(1) books.htm
(2) database.htm
(3) program.htm
(4) network.htm
(5) mysql.htm
(6) python.htm
(7) java.htm
from bs4 import BeautifulSoup import urllib.request def spider(url): try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) ____________________________________ for link in links: href=link["href"] ___________________________________ spider(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End") 递归调用
A、links=soup.select("a");url=start_url+href
B、links=soup.select("li");url=start_url+"/"+href
C、links=soup.select("a");url=start_url+"/"+href
D、links=soup.select("li");url=start_url+href
参考答案:<span style="color:rgb(255, 0, 0)">C</span>
2、栈的设计如下: class Stack: def __init__(self): self.st=[] def pop(self): _____________________ def push(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0
A、return self.st.pop(0)
B、return self.st.pop()
C、return st.pop()
D、return st.pop(0)
参考答案:<span style="color:rgb(255, 0, 0)">B</span>
3、深度优先爬取数据 from bs4 import BeautifulSoup import urllib.request class Stack: def __init__(self): self.st=[] def pop(self): return self.st.pop() def push(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): stack=Stack() stack.push(url) while not stack.empty(): url=stack.pop() try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for i in _______________________________: href=links[i]["href"] url=start_url+"/"+href stack.push(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End")
A、range(len(links)-1,-1,-1)
B、range(len(links),-1,-1)
C、range(len(links)-1,0,-1)
D、range(len(links),0,-1)
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
4、队列设计如下: class Queue: def __init__(self): self.st=[] def fetch(self): return self.st.pop(0) def enter(self,obj): _________________________________ def empty(self): return len(self.st)==0
A、self.st.append(obj)
B、self.st.insert(0,obj)
C、st.append(obj)
D、st.insert(0,obj)
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
5、广度优先爬取数据 from bs4 import BeautifulSoup import urllib.request class Queue: def __init__(self): self.st=[] def fetch(self): return self.st.pop(0) def enter(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): queue=Queue() queue.enter(url) while not queue.empty(): url=queue.fetch() try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: __________________ url=start_url+"/"+href _____________________ except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End")
A、href=link["href"];queue.fetch()
B、href=link.href;queue.enter(url)
C、href=link["href"];queue.enter(url)
D、href=link.href;queue.fetch()
参考答案:<span style="color:rgb(255, 0, 0)">C</span>
6、不重复第访问网站,采用递归的方法 from bs4 import BeautifulSoup import urllib.request def spider(url): global urls if url not in urls: ____________________ try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: href=link["href"] _________________________ spider(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" urls=[] spider(start_url) print("The End")
A、urls.append(url); url=start_url+"/"+href
B、urls.append(url); url=start_url+href
C、urls.insert(url,0); url=start_url+"/"+href
D、urls.insert(url,0); url=start_url+href
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
7、不重复访问网站,使用栈的程序 from bs4 import BeautifulSoup import urllib.request class Stack: def __init__(self): self.st=[] def pop(self): return self.st.pop() def push(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): global urls stack=Stack() stack.push(url) while not stack.empty(): url=stack.pop() if url not in urls: _______________________ try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for i in _______________________: href=links[i]["href"] url=start_url+"/"+href stack.push(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" urls=[] spider(start_url) print("The End")
A、urls.append(url); range(len(links),-1,-1)
B、urls.append(url); range(len(links)-1,0,-1)
C、urls.insert(url,0); range(len(links)-1,-1,-1)
D、urls.append(url); range(len(links)-1,-1,-1)
参考答案:<span style="color:rgb(255, 0, 0)">D</span>
8、不重复访问网站,使用队列的程序 from bs4 import BeautifulSoup import urllib.request class Queue: def __init__(self): self.st=[] def fetch(self): return self.st.pop(0) def enter(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0 def spider(url): global urls queue=Queue() queue.enter(url) while ________________________: url=queue.fetch() if url not in urls: try: urls.append(url) data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: ________________ url=start_url+"/"+href queue.enter(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" urls=[] spider(start_url) print("The End")
A、queue.empty(); href=link["href"]
B、not queue.empty(); href=link["href"]
C、queue.empty(); href=link.href
D、not queue.empty(); href=link.href
参考答案:<span style="color:rgb(255, 0, 0)">B</span>
9、在主线程中启动一个子线程执行reading函数。 import threading import time import random def reading(): for i in range(10): print("reading",i) time.sleep(random.randint(1,2)) _______________________________ r.setDaemon(False) r.start() print("The End")
A、r=threading.Thread(reading)
B、r=threading.Thread(target=reading())
C、r=threading.Thread(target=reading)
D、r=Thread(target=reading)
参考答案:<span style="color:rgb(255, 0, 0)">C</span>
10、启动一个前台线程 import threading import time import random def reading(): for i in range(5): print("reading",i) time.sleep(random.randint(1,2)) r=threading.Thread(target=reading) __________________ r.start() print("The End")
A、r.setDaemon(True)
B、r.setDaemon(true)
C、r.setDaemon(False)
D、r.setDaemon(false)
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
11、import threading import time import random def reading(): for i in range(5): print("reading",i) time.sleep(random.randint(1,2)) def test(): r=threading.Thread(target=reading) r.setDaemon(True) r.start() print("test end") t=threading.Thread(target=test) t.setDaemon(False) t.start() print("The End") 程序结果: The End reading 0 test end 你认为结果可能吗?
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
12、import threading import time import random def reading(): for i in range(5): print("reading",i) time.sleep(random.randint(1,2)) t=threading.Thread(target=reading) t.setDaemon(False) t.start() t.join() print("The End") 程序结果: reading 0 reading 1 reading 2 reading 3 reading 4 The End 你认为结果可能吗?
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
13、import threading import time import random def reading(): for i in range(5): print("reading",i) time.sleep(random.randint(1,2)) def test(): r=threading.Thread(target=reading) r.setDaemon(True) r.start() r.join() print("test end") t=threading.Thread(target=test) t.setDaemon(False) t.start() t.join() print("The End") 程序结果: reading 0 reading 1 reading 2 reading 3 reading 4 test end The End 你认为结果可能吗?
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
14、import threading import time lock=threading._RLock() words=["a","b","d","b","p","m","e","f","b"] def increase(): global words for count in range(5): lock.acquire() print("A acquired") for i in range(len(words)): for j in range(i+1,len(words)): if words[j]<words[i]: t="words[i]" words[i]="words[j]" words[j]="t" print("a ",words) time.sleep(1) lock.release() def decrease(): global words for count in range(5): lock.acquire() print("d acquired") i range(len(words)): j range(i+1,len(words)): if>words[i]: t=words[i] words[i]=words[j] words[j]=t print("D ",words) time.sleep(1) lock.release() A=threading.Thread(target=increase) A.setDaemon(False) A.start() D=threading.Thread(target=decrease) D.setDaemon(False) D.start() print("The End") 程序结果: A acquired A ['a', 'b', 'b', 'b', 'd', 'e', 'f', 'm', 'p'] The End D acquired D ['p', 'm', 'f', 'e', 'd', 'b', 'b', 'b', 'a'] D acquired D ['p', 'm', 'f', 'e', 'd', 'b', 'b', 'b', 'a'] A acquired A ['a', 'b', 'b', 'b', 'd', 'e', 'f', 'm', 'p'] A acquired A ['a', 'b', 'b', 'b', 'd', 'e', 'f', 'm', 'p'] D acquired D ['p', 'm', 'f', 'e', 'd', 'b', 'b', 'b', 'a'] D acquired D ['p', 'm', 'f', 'e', 'd', 'b', 'b', 'b', 'a'] D acquired D ['p', 'm', 'f', 'e', 'd', 'b', 'b', 'b', 'a'] A acquired A ['a', 'b', 'b', 'b', 'd', 'e', 'f', 'm', 'p'] A acquired A ['a', 'b', 'b', 'b', 'd', 'e', 'f', 'm', 'p'] 你认为结果可能吗?
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
15、from bs4 import UnicodeDammit dammit=UnicodeDammit(data,["utf-8","gbk"]) data=dammit.unicode_markup 能100%自动识别data的编码
参考答案:<span style="color:rgb(255, 0, 0)">错误</span>
16、url="http://www.weather.com.cn/weather/101280601.shtml" headers={"User-Agent":"Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"} req=urllib.request.Request(url,headers=headers) data=urllib.request.urlopen(req) data=data.read() 其中headers的作用是为了模拟浏览器
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
17、from bs4 import BeautifulSoup doc="
A
<span>
B
</span>
C
" soup=BeautifulSoup(doc,"lxml") tags=soup.select("div > p") for tag in tags: print(tag) 程序结果:
A
参考答案:<span style="color:rgb(255, 0, 0)">错误</span>
18、from bs4 import BeautifulSoup doc="
A
<span>
B
</span>
C
" soup=BeautifulSoup(doc,"lxml") tags=soup.select("div p") for tag in tags: print(tag) 程序结果:
A
参考答案:<span style="color:rgb(255, 0, 0)">错误</span>
19、soup.select("body [class] a") 查找<body>下面所有具有class属性的节点下面的节点;
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
20、soup.select("body [class] ") 查找<body>下面所有具有class属性的节点;
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
21、soup.select("body head title") 查找<body>下面<head>下面的<title>节点;
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
22、soup.select("a[id='link1']") 查找属性id="link1"的节点;
参考答案:<span style="color:rgb(255, 0, 0)">正确</span>
1、import scrapy class MySpider(scrapy.Spider): name = "mySpider" def start_requests(self): url ='http://127.0.0.1:5000' _________________________________________ def parse(self, response): print(response.url) data=response.body.decode() print(data)
A、yield scrapy.Request(url=url,callback=self.parse)
B、yield scrapy.Request(url=url,callback=parse)
C、return scrapy.Request(url=url,callback=self.parse)
D、return scrapy.Request(url=url,callback=parse)
参考答案:<span style="color:rgb(255, 0, 0)">A</span>
1、from scrapy.selector import Selector htmlText=''' <html><body> <bookstore> <book> <title lang="eng">Harry Potter</title> <price> 29.99 </price> <book> <title lang="eng">Learning XML</title> <price> 39.95 </price> </book> ''' selector=Selector(text=htmlText) print(type(selector)); print(selector) _______________ print(type(s)) print(s) 查找所有的<title>
A、s=selector.xpath("title")
B、s=selector.xpath("//title")
C、s=selector.xpath("/title")
D、s=selector.xpath("///title")
参考答案:<span style="color:rgb(255, 0, 0)">B</span>
1、from scrapy.selector import Selector htmlText=''' <html> <body> <bookstore> <title>books</title> <book> <title>Novel</title> <title lang="eng">Harry Potter</title> <price> 29.99 </price> </book> <book> <title>TextBook</title> <title lang="eng">Learning XML</title> <price> 39.95 </price> </book> ''' selector=Selector(text=htmlText) _____________________________________ for e in s: print(e) 程序结果: <selector xpath="//book/title" data="<title>Novel</title>"> <selector xpath="//book/title" data="<title lang=" eng">Harry Potter"> <selector xpath="//book/title" data="<title>TextBook</title>"> <selector xpath="//book/title" data="<title lang=" eng">Learning XML">
A、s=selector.xpath("/book").xpath("./title")
B、s=selector.xpath("//book").xpath("./title")
C、s=selector.xpath("//book").xpath("/title")
D、s=selector.xpath("/book").xpath("/title")
第8题
A、>>> 'Life is short, you need Python.'.find('you') 15
B、>>> seq = [1, 2, 3, 4]; >>> sep = '+'; >>> sep.join(seq) '1+2+3+4'
C、>>> print('{:5.3f}'.format(math.pi)) 3.1416
D、>>> print('you' in 'Life is short, you need Python.') True
第9题
(2) database.htm
(3) program.htm
(4) network.htm
(5) mysql.htm
(6) python.htm
(7) java.htm
from bs4 import BeautifulSoup import urllib.request def spider(url): try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) ____________________________________ for link in links: href=link["href"] ___________________________________ spider(url) except Exception as err: print(err) start_url="http://127.0.0.1:5000" spider(start_url) print("The End") 递归调用
A、links=soup.select("a");url=start_url+href
B、links=soup.select("li");url=start_url+"/"+href
C、links=soup.select("a");url=start_url+"/"+href
D、links=soup.select("li");url=start_url+href
为了保护您的账号安全,请在“上学吧”公众号进行验证,点击“官网服务”-“账号验证”后输入验证码“”完成验证,验证成功后方可继续查看答案!