使用map函数实现并行处理

2017/05/12 python编程

使用map函数实现并行处理

传统python多线程例子

引入库

import time 
import threading 
import Queue 
import urllib2 

Consumer类

class Consumer(threading.Thread): 
    def __init__(self, queue): 
        threading.Thread.__init__(self)
        self._queue = queue 
    def run(self):
        while True: 
            content = self._queue.get() 
            if isinstance(content, str) and content == 'quit':
                break
            response = urllib2.urlopen(content)
        print 'Bye byes!'

Producer类

def Producer():
    urls = [
        'http://www.python.org', 'http://www.yahoo.com'
        'http://www.scala.org', 'http://www.google.com'
        # etc.. 
    ]
    queue = Queue.Queue()
    worker_threads = build_worker_pool(queue, 4)
    start_time = time.time()
    # Add the urls to process
    for url in urls: 
        queue.put(url)  
    # Add the poison pillv
    for worker in worker_threads:
        queue.put('quit')
    for worker in worker_threads:
        worker.join()
    print 'Done! Time taken: {}'.format(time.time() - start_time)

构建进程池

def build_worker_pool(queue, size):
    workers = []
    for _ in range(size):
        worker = Consumer(queue)
        worker.start() 
        workers.append(worker)
    return workers

主函数

if __name__ == '__main__':
    Producer()

使用map并行

引入并行化map库

  • 多进程

from multiprocessing import Pool

  • 多线程

from multiprocessing.dummy import Pool as ThreadPool

实例化 Pool 对象

pool = ThreadPool()

一行代码

results = pool.map(urllib2.urlopen, urls)

转载请注明出处:http://attackme.cn/2017/05/12/使用map函数实现并行处理/

Search

    Post Directory