BrianSantoso's solution to "livescore18.com web scraping"

import<\/span> urllib3<\/span>\nfrom<\/span> bs4<\/span> import<\/span> BeautifulSoup<\/span>,<\/span> SoupStrainer<\/span>\nimport<\/span> datetime<\/span>\n\nhttp<\/span> =<\/span> urllib3<\/span>.<\/span>PoolManager<\/span>()<\/span>\nresponse<\/span> =<\/span> http<\/span>.<\/span>request<\/span>(<\/span>'GET'<\/span>,<\/span> 'https://www.livescore18.com/data/ft0_2.js?1548375758000/'<\/span>)<\/span>\nsoup<\/span> =<\/span> BeautifulSoup<\/span>(<\/span>response<\/span>.<\/span>data<\/span>,<\/span> 'lxml'<\/span>)<\/span>\n\n\ndef<\/span> findAllByClass<\/span>(<\/span>soup<\/span>,<\/span> tag<\/span>,<\/span> class_<\/span>)<\/span>:<\/span>\n    return<\/span> soup<\/span>.<\/span>findAll<\/span>(<\/span>tag<\/span>,<\/span> attrs=<\/span>{<\/span>'class':<\/span> class_<\/span>})<\/span>\n\ndef<\/span> findnth<\/span>(<\/span>haystack<\/span>,<\/span> needle<\/span>,<\/span> n<\/span>)<\/span>:<\/span>\n    parts<\/span>=<\/span> haystack<\/span>.<\/span>split<\/span>(<\/span>needle<\/span>,<\/span> n<\/span>+<\/span>1<\/span>)<\/span>\n    if<\/span> len<\/span>(<\/span>parts<\/span>)<\/span><=n<\/span>+<\/span>1<\/span>:<\/span>\n        return<\/span> -<\/span>1<\/span>\n    return<\/span> len<\/span>(<\/span>haystack<\/span>)<\/span>-<\/span>len<\/span>(<\/span>parts<\/span>[<\/span>-<\/span>1<\/span>])<\/span>-<\/span>len<\/span>(<\/span>needle<\/span>)<\/span>\n\n#<\/span> 24<\/span>/<\/span>02<\/span>/<\/span>2018<\/span>|<\/span>01<\/span>:<\/span>00<\/span>|<\/span>River<\/span> Plate<\/span>:<\/span>Union<\/span> de<\/span> Santa<\/span> Fe<\/span>|<\/span>0<\/span>-<\/span>1<\/span>|<\/span>1<\/span>-<\/span>2<\/span>\n\nlines<\/span> =<\/span> soup<\/span>.<\/span>getText<\/span>().<\/span>split<\/span>(<\/span>&quot<\/span>;<\/span>\\n&quot<\/span>;)[<\/span>5<\/span>:<\/span>348<\/span>]<\/span>\ndata<\/span> =<\/span> []<\/span>\nfile<\/span> =<\/span> open<\/span>(<\/span>'data.txt'<\/span>,<\/span>'w'<\/span>)<\/span>\n\n\nfor<\/span> line<\/span> in<\/span> lines<\/span>:<\/span>\n    team1<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 3<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 4<\/span>)][<\/span>2<\/span>:-<\/span>1<\/span>]<\/span>\n    team2<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 4<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 5<\/span>)][<\/span>2<\/span>:-<\/span>1<\/span>]<\/span>\n    date<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 5<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 8<\/span>)][<\/span>2<\/span>:-<\/span>1<\/span>].<\/span>replace<\/span>(<\/span>','<\/span>,<\/span> '/'<\/span>)<\/span>\n\n    time<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 8<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 11<\/span>)][<\/span>2<\/span>:-<\/span>1<\/span>]<\/span>\n    full<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 12<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 14<\/span>)][<\/span>1<\/span>:<\/span>]<\/span>\n    half<\/span> =<\/span> line<\/span>[<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 14<\/span>)<\/span>:<\/span>findnth<\/span>(<\/span>line<\/span>,<\/span> ','<\/span>,<\/span> 16<\/span>)][<\/span>1<\/span>:<\/span>]<\/span>\n\n    file<\/span>.<\/span>write<\/span>(<\/span>date<\/span> +<\/span> '|'<\/span> +<\/span> time<\/span> +<\/span> '|'<\/span> +<\/span> team1<\/span> +<\/span> '|'<\/span> +<\/span> team2<\/span> +<\/span> '|'<\/span> +<\/span> half<\/span> +<\/span> '|'<\/span> +<\/span> full<\/span> +<\/span> '\\n'<\/span>)<\/span>\n\nfile<\/span>.<\/span>close<\/span>()<\/span>\n<\/pre><\/div>
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer import datetime http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548375758000/') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) # 24/02/2018|01:00|River Plate:Union de Santa Fe|0-1|1-2 lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:-1].replace(',', '/') time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1] full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:] half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:] file.write(date + '|' + time + '|' + team1 + '|' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer import datetime http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548375758000/') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) # 24/02/2018|01:00|River Plate:Union de Santa Fe|0-1|1-2 lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:-1].replace(',', '/') time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1] ].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:] half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:] file.write(date + '|' + time + '|' + team1 + '|' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer import datetime http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548375758000/') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) # 24/02/2018|01:00|River Plate:Union de Santa Fe|0-1|1-2 lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:-1].replace(',', '/') time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:] :].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:] :].replace(',', '-') file.write(date + '|' + time + '|' + team1 + '|' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer import datetime http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548375758000/') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) # 24/02/2018|01:00|River Plate:Union de Santa Fe|0-1|1-2 lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:-1].replace(',', '/') time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') file.write(date + '|' + time + '|' + team1 + ':' + team2 + '|' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548375758000/') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) # 24/02/2018|01:00|River Plate:Union de Santa Fe|0-1|1-2 ) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] ] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', '/') time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') file.write(datestr(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2bf_en2.js?1548375758000/') 1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findAllByClass(soup, tag, class_): return soup.findAll(tag, attrs={'class': class_}) def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') half = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 1218):findnth(line, ',', 1420)][1:].replace(',', '-') half = line[findnth(line, ',', 1420):findnth(line, ',', 1622)][1:].replace(',', '-') file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: int(date[0:findnth(date, '/', 0)]) )]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] )] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') file.write(str(day) + '/' + str(month = str(month) if len(month) < 2: month = '0' + month day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 11)][2:-1].replace(',', ':') (',', ':') finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] print(finished) full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + time + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 1110)][2:-1].replace(',', ':') finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 16 hour = str(int(hour) + timezone) finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # print(finished) full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + timehour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 16 hour = str(int(hour) + timezone) finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # print(finished) full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2.js?1548379839000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) lines = soup.getText().split("\n")[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 16 hour = str(int(hour) + timezone) finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # print(finished) day = int(day) - 1 time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 16 hour = str(int(hour) + timezone) finished = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # print(finished) full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ / # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/bf_en2ft0_2.js?15483798390001548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = souptext.getText().split("\n")[") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:348] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) - 1 time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 162 hour = str((int(int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) full = line[findnth(line, ',', 13):findnth(line, ',', 15)][1:].replace(',', '-') half = line[findnth(line, ',', 15):findnth(line, ',', 17):findnth(line, ',', 18)][1:] # print(finished) full = line[findnth(line, ',', 18):findnth(line, ',', 20)][1:].replace(',', '-') half = line[findnth(line, ',', 20):findnth(line, ',', 22)][1:].replace(',', '-') if finished == '-1': ': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) - 1 time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 2 hour = str((int(hour) + timezone) % 24) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 2 hour = str((int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) full = line[findnth(line, ',', 13):findnth(line, ',', 15)][1:].replace(',', '-') half = line[findnth(line, ',', 15):findnth(line, ',', 17)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 2 hour = str((int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) full = line[findnth(line, ',', 13):findnth(line, ',', 15)][1:].replace(',', '-') half = line[findnth(line, ',', 1512):findnth(line, ',', 1714)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install bs4 pip install datetime then you can run the following: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 2 hour = str((int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = 2 hour = str((int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close()() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = 2 hour = str((int(hour) + timezone) % 24) finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') timezone_difference)) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': ': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() () Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = 2 hour = str((int(hour) + timezone_difference)) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 import time as TIME from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js?1548385046000') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = -TIME.timezone / 60 print(timezone_difference) print(hour) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = 2 hour = str((int(hour) + timezone_difference)) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install time pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 import time as TIME from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = -TIME.timezone / 60 print(timezone_difference) print(hour) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install time pip install bs4 pip install datetime then you can run the scraper using: : python scraper.py import urllib3 import time as TIME from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = -TIME.timezone / 60 print(timezone_difference) print(hour) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': ') file.truncate(0) for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = int(-TIME.timezone / 60) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install time pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 import time as TIME from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') file.truncate(0) for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = int(-TIME.timezone / 60) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!
Python solution. remember to download the dependencies, which you can do by running the following in your cmd prompt: pip install urlib3 pip install time pip install bs4 pip install datetime then you can run the scraper using: python scraper.py import urllib3 import time as TIME from bs4 import BeautifulSoup, SoupStrainer from datetime import datetime # https://www.livescore18.com/ # https://www.livescore18.com/data/bf_en2.js?1548379839000 http = urllib3.PoolManager() response = http.request('GET', 'https://www.livescore18.com/data/ft0_2.js') soup = BeautifulSoup(response.data, 'lxml') def findnth(haystack, needle, n): parts= haystack.split(needle, n+1) if len(parts)<=n+1: return -1 return len(haystack)-len(parts[-1])-len(needle) text = soup.getText() lines = text.split("\n") matchcountStart = findnth(text, '=', 3) + 1 matchcountEnd = findnth(text, ';', 3) matchcount = text[matchcountStart:matchcountEnd] matchcount = int(matchcount) lines = lines[5:5+matchcount] data = [] file = open('data.txt','w') file.truncate(0) for line in lines: team1 = line[findnth(line, ',', 3):findnth(line, ',', 4)][2:-1] team2 = line[findnth(line, ',', 4):findnth(line, ',', 5)][2:-1] date = line[findnth(line, ',', 5):findnth(line, ',', 8)][2:].replace(',', '/') year = 0 month = '' try: year = int(date[0:findnth(date, '/', 0)]) month = date[findnth(date, '/', 0) + 1:findnth(date, '/', 1)] month = int(month) + 1 month = str(month) if len(month) < 2: month = '0' + month except: print(line) continue day = date[findnth(date, '/', 1) + 1:] if len(day)<2: day = int('0' + day) day = int(day) time = line[findnth(line, ',', 8):findnth(line, ',', 10)][1:] hour = time[0:findnth(time, ',', 0)] minute = time[findnth(time, ',', 0) + 1:] # timezone = line[findnth(line, ',', 17):findnth(line, ',', 18)][1:] # timezone = int(timezone) # timezone_difference = 2 - (2 * timezone) timezone_difference = int(-TIME.timezone / 603600) hour = str((int(hour) + timezone_difference) % 24) if len(hour) < 2: hour = '0' + hour finished = line[findnth(line, ',', 11):findnth(line, ',', 12)][1:] # print(finished) half = line[findnth(line, ',', 12):findnth(line, ',', 14)][1:].replace(',', '-') full = line[findnth(line, ',', 14):findnth(line, ',', 16)][1:].replace(',', '-') if finished == '-1': file.write(str(day) + '/' + str(month) + '/' + str(year) + '|' + hour + ':' + minute + '|' + team1 + ':' + team2 + '|' + half + '|' + full + '\n') file.close() Thanks!

User: BrianSantoso

Question: livescore18.com web scraping

Back to question