Skip to content

使用requests下载文件并显示网速和下载进度

248 字小于 1 分钟

2024-12-01


def get_proxy():
    return requests.get("http://127.0.0.1:5010/get/").text


def delete_proxy(proxy):
    requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))


def downloadFile(isbn, url):
    headers = {'Proxy-Connection': 'keep-alive',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    retry_count = 5
    proxy = get_proxy()
    is_push_queue = False
    while retry_count > 0:
        try:
            try:
                r = requests.get(url, timeout=2, proxies={"http": "http://{}".format(proxy)}, headers=headers,
                                 stream=True)
            except Exception as e:
                retry_count -= 1
                continue
            if r.status_code != 200:
                retry_count -= 1
                continue
            if r.status_code == 404:
                break
            try:
                book_name = r.headers['Content-Disposition'].split('filename=')[1].encode('ISO-8859-1').decode("utf8")
                book_name = eval(book_name)
                length = float(r.headers['content-length'])
            except Exception as e:
                retry_count -= 1
                continue
            path = "/root/sp_data/" + str(isbn) + "/"
            if not os.path.exists(path):
                os.makedirs(path)
            path = path + book_name
            f = open(path, 'wb')
            count = 0
            count_tmp = 0
            time1 = time.time()
            for chunk in r.iter_content(chunk_size=512):
                if chunk:
                    f.write(chunk)
                    count += len(chunk)
                    if time.time() - time1 > 0.1:
                        p = count / length * 100
                        speed = (count - count_tmp) / 1024 / 1024 / 0.1
                        count_tmp = count
                        print(book_name + ': ' + formatFloat(p) + '%' + ' Speed: ' + formatFloat(speed) + 'M/S')
                        time1 = time.time()
            f.close()
            assert length == os.path.getsize(path)
            return
        except Exception as e:
            print(e)
            retry_count -= 1
            is_push_queue = True

    if retry_count <= 0:
        dict_obj = {"isbn": isbn, "href": url}
        print("放回队列")
        redis_obj.rpush("isbn_list", json.dumps(dict_obj))
        delete_proxy(proxy)


def formatFloat(num):
    return '{:.2f}'.format(num)