Leggere valori da un txt e usarli in script

di il
1 risposte

Leggere valori da un txt e usarli in script

Ho un file txt che contiene i vari link da scraperare 1 link per ogni rigo

https://www.sbostats.com/soccer/stats?country=italy&league=serie-a"e=1.33&direction=home&id=Mzk5OTk4OQ==
https://www.sbostats.com/soccer/stats?country=italy&league=serie-a"e=2.25&direction=away&id=Mzk5OTk5Nw==

Vorrei far ripetere l'operazione di scrape per tutti i link del txt (inserendo il singolo link a mano funziona)

import requests
from bs4 import BeautifulSoup

file1 = open('matches.txt', 'r') 
Lines = file1.readlines()
 
count = 0
# Strips the newline character
for line in Lines:
    count += 1

urls = [
    Lines 
    ]

with requests.Session() as s:
    for url in urls:
        r = s.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
cookies = {
    'lang': 'en',
    '_gid': 'GA1.2.401836825.1695104715',
    'cookieconsent_status': 'dismiss',
    'g_state': '{"i_l":0}',
    'PHPSESSID': 'Wo7GoJMA3fKhNCJwrPk1cJ-V2KX5Q7Aeu1Wq1GBGPNy416sR',
    'TawkConnectionTime': '0',
    'twk_uuid_60b4abc3de99a4282a1a8168': '%7B%22uuid%22%3A%221.1vWxXwX30BjJHBGBIxFg1FCZvjkirqPSIYVZWsBJsNQcGsbxHWGXqc9w7tGn2ecIUpOyagniCRCVEkqEWMMtrGILRmQmtzR6arkhBh0XGElx8VrpaoMKZqR%22%2C%22version%22%3A3%2C%22domain%22%3A%22asianodds.com%22%2C%22ts%22%3A1695195717866%7D',
    '_gat_gtag_UA_11876182_16': '1',
    '_ga_DM0MPQ7KQF': 'GS1.1.1695192930.3.1.1695195830.0.0.0',
    '_ga': 'GA1.1.490235404.1695104715',
}

headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,de;q=0.6',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    # 'Cookie': 'lang=en; _gid=GA1.2.401836825.1695104715; cookieconsent_status=dismiss; g_state={"i_l":0}; PHPSESSID=Wo7GoJMA3fKhNCJwrPk1cJ-V2KX5Q7Aeu1Wq1GBGPNy416sR; TawkConnectionTime=0; twk_uuid_60b4abc3de99a4282a1a8168=%7B%22uuid%22%3A%221.1vWxXwX30BjJHBGBIxFg1FCZvjkirqPSIYVZWsBJsNQcGsbxHWGXqc9w7tGn2ecIUpOyagniCRCVEkqEWMMtrGILRmQmtzR6arkhBh0XGElx8VrpaoMKZqR%22%2C%22version%22%3A3%2C%22domain%22%3A%22asianodds.com%22%2C%22ts%22%3A1695195717866%7D; _gat_gtag_UA_11876182_16=1; _ga_DM0MPQ7KQF=GS1.1.1695192930.3.1.1695195830.0.0.0; _ga=GA1.1.490235404.1695104715',
    'Origin': 'https://www.asianodds.com',
    'Referer': 'https://www.asianodds.com/en/in-play',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
    'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'events_page': 'live',
    'page': '1',
    'form_data': 'league=0&open_coef=1&sounds%5B%5D=1&sounds%5B%5D=2&sounds%5B%5D=3&sounds%5B%5D=4&sounds%5B%5D=5&sounds%5B%5D=6&columns%5B%5D=7&columns%5B%5D=8&columns%5B%5D=9&columns%5B%5D=10&columns%5B%5D=11&columns%5B%5D=12&columns%5B%5D=13&columns%5B%5D=30&columns%5B%5D=31&columns%5B%5D=32&columns%5B%5D=33&columns%5B%5D=34&columns%5B%5D=35&columns%5B%5D=36&columns%5B%5D=37&columns%5B%5D=38&columns%5B%5D=39&columns%5B%5D=40&columns%5B%5D=41&columns%5B%5D=42&columns%5B%5D=43&columns%5B%5D=44&columns%5B%5D=45&columns%5B%5D=46&columns%5B%5D=47&columns%5B%5D=48&columns%5B%5D=55&columns%5B%5D=56&columns%5B%5D=57&period_ft=1&period_ht=1&period_2nd=1&event_min__sym=&event_min__val=&event_min__val_2=&score_lead__t=1&score_lead__sym=&score_lead__val=&total_score__sym=&total_score__val=&goal_rating=1&home_rating=1&away_rating=1&lc__m_c__p=1&lc__m_c__t=1&lc__m_c__sym=&lc__m_c__val=&lc__m_c__val_2=&lc__m_o__p=1&lc__m_o__t=1&lc__m_o__sym=&lc__m_o__val=&lc__m_o__val_2=&save_load=0&table_nf=1&sport=29&sport_type=1&book=207',
}

#Here i want to change the url from matches.txt and generate a txt with statistics for all matches
response = requests.post(url, cookies=cookies, headers=headers, data=data)

soup = BeautifulSoup(r.text,"lxml")
link  = soup.find_all("div", {"class": "col-xs-6 col-md-4 val_stats"})
print (link)
for star in link:
    print(star.get_text())
    j = star.get_text()
    f = open("statistics.txt", "a")
    f.write(j+"\n")
    f.close()

1 Risposte

  • Re: Leggere valori da un txt e usarli in script

    Ho parzialmente risolto.. ora mi salva tutti i dati di ogni match.. ma perdo il riferimento a quale partita si riferiscono

    import requests
    from bs4 import BeautifulSoup
    
    file1 = open('matches.txt', 'r') 
    Lines = file1.read()
    data_into_list = Lines.split("\n")
    print(data_into_list) 
    count = 0
    # Strips the newline character
    for line in data_into_list:
        length = len(data_into_list)
        print(length)
    
    urls = [
        line
        ]
    #print(urls)
    with requests.Session() as s:
        #for url in range(urls,len(data_into_list)):
        for url in data_into_list:
            #print(url)
            r = s.get(url)
            soup = BeautifulSoup(r.text, 'html.parser')
            soup = BeautifulSoup(r.text,"lxml")
            link  = soup.find_all("div", {"class": "col-xs-6 col-md-4 val_stats"})
            #print (link)
            for star in link:
               print(star.get_text())
               j = star.get_text()
               f = open("statistics.txt", "a")
               f.write(j+"\n")
               f.close()
    cookies = {
        'lang': 'en',
        '_gid': 'GA1.2.401836825.1695104715',
        'cookieconsent_status': 'dismiss',
        'g_state': '{"i_l":0}',
        'PHPSESSID': 'Wo7GoJMA3fKhNCJwrPk1cJ-V2KX5Q7Aeu1Wq1GBGPNy416sR',
        'TawkConnectionTime': '0',
        'twk_uuid_60b4abc3de99a4282a1a8168': '%7B%22uuid%22%3A%221.1vWxXwX30BjJHBGBIxFg1FCZvjkirqPSIYVZWsBJsNQcGsbxHWGXqc9w7tGn2ecIUpOyagniCRCVEkqEWMMtrGILRmQmtzR6arkhBh0XGElx8VrpaoMKZqR%22%2C%22version%22%3A3%2C%22domain%22%3A%22asianodds.com%22%2C%22ts%22%3A1695195717866%7D',
        '_gat_gtag_UA_11876182_16': '1',
        '_ga_DM0MPQ7KQF': 'GS1.1.1695192930.3.1.1695195830.0.0.0',
        '_ga': 'GA1.1.490235404.1695104715',
    }
    
    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,de;q=0.6',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        # 'Cookie': 'lang=en; _gid=GA1.2.401836825.1695104715; cookieconsent_status=dismiss; g_state={"i_l":0}; PHPSESSID=Wo7GoJMA3fKhNCJwrPk1cJ-V2KX5Q7Aeu1Wq1GBGPNy416sR; TawkConnectionTime=0; twk_uuid_60b4abc3de99a4282a1a8168=%7B%22uuid%22%3A%221.1vWxXwX30BjJHBGBIxFg1FCZvjkirqPSIYVZWsBJsNQcGsbxHWGXqc9w7tGn2ecIUpOyagniCRCVEkqEWMMtrGILRmQmtzR6arkhBh0XGElx8VrpaoMKZqR%22%2C%22version%22%3A3%2C%22domain%22%3A%22asianodds.com%22%2C%22ts%22%3A1695195717866%7D; _gat_gtag_UA_11876182_16=1; _ga_DM0MPQ7KQF=GS1.1.1695192930.3.1.1695195830.0.0.0; _ga=GA1.1.490235404.1695104715',
        'Origin': 'https://www.asianodds.com',
        'Referer': 'https://www.asianodds.com/en/in-play',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
        'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }
    
    data = {
        'events_page': 'live',
        'page': '1',
        'form_data': 'league=0&open_coef=1&sounds%5B%5D=1&sounds%5B%5D=2&sounds%5B%5D=3&sounds%5B%5D=4&sounds%5B%5D=5&sounds%5B%5D=6&columns%5B%5D=7&columns%5B%5D=8&columns%5B%5D=9&columns%5B%5D=10&columns%5B%5D=11&columns%5B%5D=12&columns%5B%5D=13&columns%5B%5D=30&columns%5B%5D=31&columns%5B%5D=32&columns%5B%5D=33&columns%5B%5D=34&columns%5B%5D=35&columns%5B%5D=36&columns%5B%5D=37&columns%5B%5D=38&columns%5B%5D=39&columns%5B%5D=40&columns%5B%5D=41&columns%5B%5D=42&columns%5B%5D=43&columns%5B%5D=44&columns%5B%5D=45&columns%5B%5D=46&columns%5B%5D=47&columns%5B%5D=48&columns%5B%5D=55&columns%5B%5D=56&columns%5B%5D=57&period_ft=1&period_ht=1&period_2nd=1&event_min__sym=&event_min__val=&event_min__val_2=&score_lead__t=1&score_lead__sym=&score_lead__val=&total_score__sym=&total_score__val=&goal_rating=1&home_rating=1&away_rating=1&lc__m_c__p=1&lc__m_c__t=1&lc__m_c__sym=&lc__m_c__val=&lc__m_c__val_2=&lc__m_o__p=1&lc__m_o__t=1&lc__m_o__sym=&lc__m_o__val=&lc__m_o__val_2=&save_load=0&table_nf=1&sport=29&sport_type=1&book=207',
    }
    
    #Here i want to change the url from matches.txt and generate a txt with statistics for all matches
    #response = requests.post(url, cookies=cookies, headers=headers, data=data)
    
    
    
Devi accedere o registrarti per scrivere nel forum
1 risposte