def _getDate(self, date): """ Helper function used to format url in the desired date in getMatches() :param date: datetime.date object :return: The formatted year, month and day of the date object """ year = str(date.year) month = str(date.month) if date.month >= 10 else'0' + str(date.month) day = str(date.day) if date.day >= 10 else'0' + str(date.day) return year, month, day
championshipTables = bs.find_all('div', {'class':'table_wrapper'}) errorList = [] for i in range(len(championshipTables)): try: championshipTables[i].find('a', {'href':re.compile('^/en/comps/')}).get_text() except AttributeError: errorList.append(i) for error in errorList: del championshipTables[error] desiredTables = [ch for ch in championshipTables if ch.find('a', {'href':re.compile('^/en/comps/')}).get_text() in leagues]
for table in desiredTables: time.sleep(4) matchesLinks = [] homeTeams = table.find_all('td', {'data-stat':'home_team'}) for team in homeTeams: self.homeTeams.append(team.get_text()) self.dates.append(day) awayTeams = table.find_all('td', {'data-stat':'away_team'}) for team in awayTeams: self.awayTeams.append(team.get_text()) scores = table.find_all('td', {'data-stat':'score'}) for score in scores: scoreHome, scoreAway = self._getScore(score.get_text()) self.scoresHome.append(scoreHome) self.scoresAway.append(scoreAway) matchesLinks.append(score.find('a', {'href':re.compile('^/')})['href'])
if table.find_all('td', {'data-stat':'home_xg'}): homeXG = table.find_all('td', {'data-stat':'home_xg'}) awayXG = table.find_all('td', {'data-stat':'away_xg'}) for xg in homeXG: self.homeXG.append(xg.get_text()) for xg in awayXG: self.awayXG.append(xg.get_text()) else: for team in homeTeams: self.homeXG.append(np.nan) self.awayXG.append(np.nan)
for link in matchesLinks: dfMatchStats.loc[len(dfMatchStats)] = self._getMatchStats(link)
def _getMatchStats(self, url): """ Helper function to extract the match stats for each match in getMatches() :param url: The match report url - is extracted in getMatches() :return: List with match stats """
matchStatsList = [] htmlMatch = urlopen(self.originLink + url) bsMatch = BeautifulSoup(htmlMatch.read(), 'html.parser') homeLineup = bsMatch.find('div', {'class':'lineup', 'id':'a'}) if not homeLineup: homePlayers = [] awayPlayers = [] for i in range(0,11): homePlayers.append(np.nan) awayPlayers.append(np.nan) yellowCardsHome = np.nan redCardsHome = np.nan yellowCardsAway = np.nan redCardsAway = np.nan matchStatsList.extend([yellowCardsHome, redCardsHome, yellowCardsAway, redCardsAway]) for key, value in stats.items(): matchStatsList.extend(value) return homePlayers + awayPlayers + matchStatsList homePlayers = homeLineup.find_all('a', {'href':re.compile('^/en/players')})[0:11] homePlayers = [player.get_text() for player in homePlayers] awayLineup = bsMatch.find('div', {'class':'lineup', 'id':'b'}) awayPlayers = awayLineup.find_all('a', {'href':re.compile('^/en/players')})[0:11] awayPlayers = [player.get_text() for player in awayPlayers] matchCards = bsMatch.find_all('div', {'class':'cards'}) yellowCardsHome = len(matchCards[0].find_all('span', {'class':'yellow_card'})) + len(matchCards[0].find_all('span', {'class':'yellow_red_card'})) redCardsHome = len(matchCards[0].find_all('span', {'class':'red_card'})) + len(matchCards[0].find_all('span', {'class':'yellow_red_card'})) yellowCardsAway = len(matchCards[1].find_all('span', {'class':'yellow_card'})) + len(matchCards[1].find_all('span', {'class':'yellow_red_card'})) redCardsAway = len(matchCards[1].find_all('span', {'class':'red_card'})) + len(matchCards[1].find_all('span', {'class':'yellow_red_card'})) matchStatsList.extend([yellowCardsHome, redCardsHome, yellowCardsAway, redCardsAway])
extraStatsPanel = bsMatch.find("div", {"id":"team_stats_extra"}) for statColumn in extraStatsPanel.find_all("div", recursive=False): column = statColumn.find_all("div") columnValues = [value.get_text() for value in column] for index, value in enumerate(columnValues): if not value.isdigit() and value in stats: stats[value] = [int(columnValues[index-1]), int(columnValues[index+1])] for key, value in stats.items(): matchStatsList.extend(value)
WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.CLASS_NAME, 'fvgWCd'))) matches = self.driver.find_elements(By.CLASS_NAME, 'js-list-cell-target') for match in matches: if self._checkExistsByClass('blXay'): homeTeam.append(match.find_element(By.CLASS_NAME, 'blXay').text) awayTeam.append(match.find_element(By.CLASS_NAME, 'crsngN').text)
if match.find_element(By.CLASS_NAME, 'haEAMa').text == '-': oddsObject = match.find_elements(By.CLASS_NAME, 'fvgWCd') for odd in oddsObject: odds.append(odd.text)
df = pd.DataFrame(columns=["{team}Player{i}".format(team="home"if i <=10 else"away", i=i+1 if i <=10 else i-10) for i in range(0,22)]) df["homeTeam"] = []
if self._checkExistsByClass("jwanNG") and self.driver.find_element(By.CLASS_NAME, "jwanNG").text == "LINEUPS":
lineupButton = self.driver.find_element(By.CLASS_NAME, "jwanNG") lineupButton.click() # wait until players are avilable WebDriverWait(self.driver, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, "kDQXnl"))) players = self.driver.find_elements(By.CLASS_NAME, "kDQXnl") playerNames=[] for player in players: playerNames.append(player.find_elements(By.CLASS_NAME, "sc-eDWCr")[2].accessible_name) playerNames = [self._isCaptain(playerName) for playerName in playerNames] playerNames.append(nameInPanel)