题目.png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


url_list = ['https://www.sdtbu.edu.cn/info/1043/35641.htm','https://www.sdtbu.edu.cn/info/1043/35612.htm']

def fetch_and_parse(url):
    response = requests.get(url,timeout=5)
    response.raise_for_status()    
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    title = soup.find_all('title')
    print(f"页面标题:",title[0].text)

    content_box = soup.find('div', class_='content-box')
    if not content_box:
        print("未找到class为'content-box'的div元素")
        return



    img_tags = content_box.find_all('img')
    if not img_tags:
        print("在content-box中未找到任何图片")
        return

    print(f"共找到{len(img_tags)}张图片。")



    for i, img in enumerate(img_tags, 1):
        img_url = img.get('src')
        if not img_url:
            print(f"图片 {i}: 未找到src属性,跳过")
            continue
        full_img_url = urljoin(url, img_url)

        try:
            img_response = requests.get(full_img_url, stream=True, timeout=5)
            img_response.raise_for_status()
            content_length = img_response.headers.get('content-length')

            if content_length:
                size_bytes = int(content_length)

            else:
                size_bytes = len(img_response.content)

            if size_bytes >= 1024 * 1024:
                size_str = f"{size_bytes / (1024 * 1024):.2f} MB"

            else:
                size_str = f"{size_bytes / 1024:.2f} KB"
            print(f"图片{i}: {size_str}({full_img_url})")


        except requests.exceptions.RequestException as e:
            print(f"下载图片失败: {str(e)}")
        except Exception as e:
            print(f"处理图片时发生错误: {str(e)}")



if __name__=="__main__":
    for url in url_list:
        fetch_and_parse(url)