AssEdbeta: the semi-automatic assistant editor
$Html... time elapsed: $Time seconds.
END; exit; } /******************************************************************************************************************************************** * Function: SaveReload() * Saves variables and reloads the page to move on to the next stage */ function SaveReload() { $_SESSION['Stage'] = $this->Stage; $_SESSION['PostData'] = $this->PostData; $_SESSION['QueryKey'] = $this->QueryKey; $_SESSION['StickyHtml'] = $this->StickyHtml; if (isset($this->WebEnv)) $_SESSION['WebEnv'] = $this->WebEnv; if (isset($this->AuthorList)) $_SESSION['AuthorList'] = $this->AuthorList; if (isset($this->AllResults)) $_SESSION['AllResults'] = $this->AllResults; if (isset($this->CurResults)) $_SESSION['CurResults'] = $this->CurResults; if ($this->Stage[0] == "searches") { foreach ($this->PostData['Searches'] as $k => $Search) { if ($this->Stage[1] > $k) $this->Html .= "\n" . $Search['SearchString'] . ": 100% (" . $Search['Count'] . " / " . $Search['Count'] . ")
\n"; elseif (($this->Stage[1] == $k) and !empty($Search['Count'])) { $width = ($this->Stage[2] / $Search['Count']) * 600; $pc = round(($width / 6), 2); $width = round($width); $width2 = 600 - $width; $this->Html .= "\n" . $Search['SearchString'] . ": " . $pc . "% (" . $this->Stage[2] . " / " . $Search['Count'] . ")
\n"; } else $this->Html .= "\n" . $Search['SearchString'] . ": 0%
\n"; } $this->Html .= "\nPost-search filters: 0%
\n"; } else { if (empty($_SESSION['Filtered']) and isset($_SESSION['ToFilter'])) $_SESSION['Filtered'] = $_SESSION['ToFilter'] - count($this->AllResults[1]); foreach ($this->PostData['Searches'] as $k => $Search) { $this->Html .= "\n" . $Search['SearchString'] . ": 100% (" . $Search['Count'] . " / " . $Search['Count'] . ")
\n"; } if (isset($_SESSION['Filtered']) and isset($_SESSION['ToFilter'])) { $width = ($_SESSION['Filtered'] / $_SESSION['ToFilter']) * 600; $pc = round(($width / 6), 2); $width = round($width); $width2 = 600 - $width; $this->Html .= "\nApplying limits and filters: " . $pc . "%
\n"; } else $this->Html .= "\nPost-search filters: 0%
\n"; } $this->Title = "AssEd: Processing searches..."; $this->OutputHtml(true); } /******************************************************************************************************************************************** * Function: PrintResults() * Prints the search results */ function PrintResults() { $_SESSION['Stage'] = $this->Stage; // Save these two variables into the session to enable re-ordering if (isset($this->AuthorList)) $_SESSION['AuthorList'] = $this->AuthorList; $this->Title = "AssEd: Search complete"; if (empty($this->AuthorList)) $this->Html .= "Sorry, it would appear that nobody has that particularly combination of skills.
\n"; else { $this->Html .= "| Author | \n"; // Print the column headers $i = 0; foreach ($this->PostData['Searches'] as $Search) { $i += 1; $this->Html .= "" . $Search['SearchString'] . "↓ | \n"; } // Check the limits if ($this->PostData['Active-All']) $this->Html .= "All pubs ↓ | \n"; if ($this->PostData['Active-5yr']) $this->Html .= "5yr ↓ | \n"; if ($this->PostData['Active']) $this->Html .= "1yr ↓ | \n"; if ($this->PostData['Active-Senior']) $this->Html .= "Last Au ↓ | \n"; $this->Html .= "
|---|---|---|---|---|---|
| " . $Author . " | \n"; // Loop through the search terms for ($i = 1; $i <= $j; $i++) { $this->Html .= "PostData['Searches'][$i]['SearchString']) . ")\">" . $Items[$i] . " | \n"; } // Check the limits if ($this->PostData['Active-All']) $this->Html .= "" . $Items['All'] . " | \n"; if ($this->PostData['Active-5yr']) { $t = array(date('Y'), date('m'), date('d')); $pdat2 = implode("/", $t); $t[0] -= 5; $pdat1 = implode("/", $t); $this->Html .= "" . $Items['5yr'] . " | \n"; } if ($this->PostData['Active']) { $t = array(date('Y'), date('m'), date('d')); $pdat2 = implode("/", $t); $t[0] -= 1; $pdat1 = implode("/", $t); $this->Html .= "" . $Items['1yr'] . " | \n"; } if ($this->PostData['Active-Senior']) $this->Html .= "" . $Items['Senior'] . " | \n"; $this->Html .= "
Warning: caught an exception when trying to retreive the XML data from PubMed. I am going to reload the page and hope that the problem goes away. The error given was: " . $e->getMessage() . "
\n"; $this->SaveReload(); } $this->Xml = $this->Connection->GetResponseBody(); // Convert the response into a string, saved in the global 'Xml' if (strpos($this->Xml, "Warning: PubMed returned error #1. I am going to try to refresh the PubMed session variable, then reload the page and try again.
\n"; $this->PubmedIds(); // if we get this error, the WebEnv session ID has probably expired, so it is time $this->SaveReload(); // to get the webenv again, and do a reload } $f = fopen($filename, 'w'); // Cache the new file fwrite($f, $this->Xml); fclose($f); if (!file_exists($filename)) $this->Html .= "Warning: could not save cache file (" . $filename . ")
"; return true; } /**************************************************************************************************************************************** * Function: PubmedSearch() * I broke this out from AssEd() because I was using the same bit of code three times. This checks whether we've done the initial * PubmedIds() search, and if not, runs it. Then it's just the ExtractAuthors. * AssEd is actually much more efficiently arranged now, so it does not actually need to be separate, but, hey, it's neater this way. * The cache updater section of this function was originally a separate function, but I kept making it more code efficient, and eventually merged. */ function PubmedSearch() { Global $ncbi_CachePath; /***/ // Set some important variables $this->SearchTerm = trim($this->PostData['Searches'][$this->Stage[1]]['SearchString']); $this->MinDate = trim($this->PostData['Searches'][$this->Stage[1]]['Since']); // if the count and translation haven't been set, or the PubMed WebEnv variable is missing get them now. if (($this->Stage[2] == 1) OR (empty($this->WebEnv)) OR (empty($this->Translation))) $this->PubmedIds(true); if ($this->PostData['Searches'][$this->Stage[1]]['Count'] > 1000000) ini_set('memory_limit', '1024M'); // Make sure we have enough memory to deal with all of the results $this->Translation = $this->PostData['Searches'][$this->Stage[1]]['Translation']; /***/ // Check if we have a cached author list /***** TEMP FIX ******/ // This is to update cache file names from v0.4b to v0.5b $o = $ncbi_CachePath . "author_lists/" . trim($this->MinDate, ".") . "," . str_replace("/", "", trim($this->SearchTerm, ".")) . ".txt"; $c = $this->GetCacheName(); // When the old cache file names have been updated, the line above and the line below can be removed if (file_exists($o) and !file_exists($c)) { $this->StickyHtml .= "Notice: Updated old cache file from v0.4 to v0.5 for '{$this->SearchTerm}'.
\n"; rename($o, $c); } /*** END TEMP FIX ***/ if (($s = $this->CheckCache()) !== 0) { // See if we already have this search cached $Attribs = $this->GetCacheAttribs(); // If the cache is up-to-date, we can fetch the attributes and then exit this search ready for the next one $this->PostData['Searches'][$this->Stage[1]]['Count'] = $Attribs['Count']; if ($s == 2) { // If the cache is old, we'll have to update it /*** CACHE UPDATER ***/ $this->CurResults = $this->GetCacheResults(); // First, extract the old cache results $old_count = count($this->CurResults); $updated = getdate($Attribs['Updated']); $updated = array($updated['year'], $updated['mon'], 01); $now = getdate(); $now = array($now['year'], $now['mon'], 01); $this->PubmedIDs(true, implode("/", $updated), implode("/", $now), false); // Add the new items to them if (!empty($this->PostData['Searches'][$this->Stage[1]]['Count'])) $this->PubmedExtractAuthors(true, false); $added = count($this->CurResults) - $old_count; $added_papers = $this->PostData['Searches'][$this->Stage[1]]['Count']; $updated[0] -= $this->MinDate; $now[0] -= $this->MinDate; $this->PubmedIDs(true, implode("/", $updated), implode("/", $now), false); // And remove the old ones if (!empty($this->PostData['Searches'][$this->Stage[1]]['Count'])) $this->PubmedExtractAuthors(true, true); $removed = ($old_count + $added) - count($this->CurResults); $removed_papers = $this->PostData['Searches'][$this->Stage[1]]['Count']; $this->PubMedIDs(); // Run a normal PubMedIDs in order to get the new hit count $this->SaveAuthors(); $new_count = count($this->CurResults); $this->StickyHtml .= "Notice: updated cache for {$this->SearchTerm}: added $added counts for $added_papers papers and removed $removed counts for $removed_papers papers. New total is $new_count authors from {$this->PostData['Searches'][$this->Stage[1]]['Count']} papers.
\n"; unset($this->CurResults); } unset($this->Translation, $this->WebEnv); return true; } /***/ // Otherwise, run the search if ($this->PubmedExtractAuthors() === false) exit; /***/ // We will only ever get this far if the search has completed // - so, we can now cache the author list $this->SaveAuthors(); unset($this->Translation, $this->WebEnv, $_SESSION['WebEnv'], $this->CurResults, $_SESSION['CurResults']); return true; } /*************************************************************************************************************************************** * Function: PubmedIds ( ) * - Gets a list of PubMed IDs and saves an Entrez session variable for later retrieval of the full items */ function PubmedIds($tidy = false, $mindate = false, $maxdate = false, $escape_empty_results = true) { Global $ncbi_Tool, $ncbi_Email; // Die on error -- This should NEVER occur, and can probably be deleted. if (empty($this->SearchTerm)) die( "Error: missing search term for new search in this->PubmedIds.
" ); // Construct the Query Data $QueryData = array( "db" => "pubmed", "retmode" => "xml", "usehistory" => "y", "term" => $this->SearchTerm, ); if ($mindate and $maxdate) { $QueryData['mindate'] = $mindate; $QueryData['maxdate'] = $maxdate; } elseif (!empty($this->PostData['Searches'][$this->Stage[1]]['Since'])) { $y = date('Y') - $this->PostData['Searches'][$this->Stage[1]]['Since']; $QueryData['mindate'] = $y . "/" . date('m') . "/01"; $QueryData['maxdate'] = date('Y/m/01'); } $this->GetXmlFile( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", $QueryData ); // Get the XML file $this->Array = $this->XmlParser( $this->Xml ); // Parse it unset($this->Xml); // clean up! $this->QueryKey = $this->Array['eSearchResult']['QueryKey']; // Save entrez's "session" variables $this->WebEnv = $this->Array['eSearchResult']['WebEnv']; if ($escape_empty_results and empty($this->Array['eSearchResult']['Count'])) { // Check that pubmed actually has some entries matching the query $this->Html .= "Sorry, PubMed has no items matching your search for " . $QueryData['term'] . ". Check your spelling, or try an older date cutoff.
\n"; $this->OutputHtml(); exit; } $Translation = str_replace(""", "\"", $this->Array['eSearchResult']['QueryTranslation']); // Get the query translation //$this->PostData['Searches'][$this->Stage[1]]['Translation'] = substr($Translation, 1, (strrpos($Translation, ")")-1)); // Cut everything outside the brackets in the translation -- this cuts off the EDAT conditions to make a cache month non-specific $this->PostData['Searches'][$this->Stage[1]]['Translation'] = substr($Translation, 0, -40); // Cut everything outside the brackets in the translation -- this cuts off the EDAT conditions to make a cache month non-specific $this->PostData['Searches'][$this->Stage[1]]['Count'] = $this->Array['eSearchResult']['Count']; // Update the counts (GetCounts() does not support data ranges) if (!empty($tidy)) { // If we're being tidy, remove all the old variables and return true unset($this->Array); return true; } return $this->Array; // Otherwise, return the array } /**************************************************************************************************************************************** * Function PubmedExtractAuthors ( single, reverse, noisy ) * - Takes the search and finds all authors * I'm not entirely sure what the most efficient way of doing this is. I have tried using the XML>Array function, but this is * insanely memory intensive. The simple loop and text-search works, but I'm not convinced that there aren't better ways (a * preg_match, perhaps?) to do it. * Input vars: * - Single: gets all results in a single file rather than [$ncbi_RetMax] items at a time (used when updating caches at the start of the month) * - Reverse: subtracts matching authors from the list, instead of increasing their counts * - Noisy: debug mode -- prints the list of changed authors */ function PubmedExtractAuthors($single = false, $reverse = false) { Global $ncbi_CachePath, $ncbi_RetMax; // Check if we have a partial cache if (!$single and ($this->Stage[2] == 1)) { $i = 50001; while (true) { $c = $ncbi_CachePath . "tmp/" . trim($this->MinDate, ".") . "," . md5($this->Translation) . "," . $i; if ((file_exists($c)) and (date('Y', filectime($c)) == date('Y')) and (date('m', filectime($c)) == date('m'))) { $this->Stage[2] = $i; $i += 50000; $_SESSION['Cleanup'][] = $c; continue; } else break; } } // Run the search $this->QueryData = array( "db" => "pubmed", "retmode" => "xml", "usehistory" => "y", "WebEnv" => $this->WebEnv, "query_key" => $this->QueryKey, "retstart" => $this->Stage[2], "retmax" => $ncbi_RetMax ); if ($single) $this->QueryData['retmax'] = $this->PostData['Searches'][$this->Stage[1]]['Count']; // If we're retrieving this as a single file, amend the "retmax" to get all of the results $this->GetXmlFile( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", $this->QueryData ); // Get the XML file $lines = explode("\n", $this->Xml); // Extract all of the authors from the XML string foreach ($lines as $line) { $line = trim($line); if (substr($line, 0, 34) == "Warning: script ran GetCacheAttribs() on a non-existent cache.
\n"; return 0; // Return 0 for files which do not exist } $x = file_get_contents($f); // Extract from the file $x = str_replace("\r\n", "\n", $x); // This is a (temp?) fix for processing *nix generated author lists on windows servers $a = explode("[Results]\n", $x); $a = explode("\n", $a[0]); foreach ($a as $k => $v) { if (empty($v)) continue; $v = explode("=", $v); $r[$v[0]] = $v[1]; } /*** TEMP FIX ***/ // This is for v0.3 cache files which had Expiry attributes not Updated attribs. It can be deleted once the old cache files have been updated if (empty($r['Updated'])) $r['Updated'] = filectime($f); return $r; } /*************************************************************************************************************************************** * Function GetCacheResults * - Extracts the author list from a cache file and returns it as an array */ function GetCacheResults($Search = null, $AtLeast = null, $AtMost = null) { if (empty($Search)) $Search = $this->Stage[1]; // Set the QueryTranslation to the currently active query if (!$this->CheckCache($Search)) return false; // If the cache doesn't exist, return false $f = $this->GetCacheName($Search); $x = file_get_contents($f); // Extract from the file $x = str_replace("\r\n", "\n", $x); // This is a (temp?) fix for processing *nix generated author lists on windows servers $a = explode("[Results]\n", $x); $a = explode("\n", $a[1]); foreach ($a as $k => $v) { if (empty($v)) continue; $v = explode("=", $v); if (isset($AtLeast) and ($v[1] < $AtLeast)) continue; if (isset($AtMost) and ($v[1] > $AtMost)) continue; $r[$v[0]] = $v[1]; } return $r; } /*************************************************************************************************************************************** * Function CheckCache * - checks to see if the cache file exists, and if so, if it is current */ function CheckCache($Search = null) { if (empty($Search)) $Search = $this->Stage[1]; // Set the QueryTranslation to the currently active query $f = $this->GetCacheName($Search); if (!file_exists($f)) return 0; // Return 0 for files which do not exist $a = $this->GetCacheAttribs($Search); if ((date('Y', $a['Updated']) == date('Y')) and (date('m', $a['Updated']) == date('m'))) return 1; // Return 1 for files which are up-to-date return 2; // Return 2 for files which need updating } /**************************************************************************************************************************************** * Function: SaveAuthors * - I wanted to use an XML layout for the saved authors list, but that considerably raises the amount of memory required (e.g. to get * the array back out of the xml file). Therefore, we have to put up with a cheap and dirty solution * - In v0.4 this had a "TempSave" option, now moved to a dedicated function */ function SaveAuthors() { $cutoff = 1; // If there are too many results, we can ignore all who have only one item $total = count($this->CurResults); // for the sake of performance if ($total > 250000) $cutoff = 2; // (the original cutoff was based on results for the search - i.e. this->PostData[Searches][n][Count]) foreach ($this->CurResults as $author => $count) { if ($count >= $cutoff) $a[] = $author . "=" . $count; } if ($this->CheckCache() and ($o = $this->GetCacheAttribs())) { // Find out when the cache was first made if (!empty($o['Created'])) $Created = $o['Created']; // This if/else is to cope with pre v0.5 cache files which do not have the created else $Created = $o['Updated']; // data - when those cache files have been updated, this can be removed. } else $Created = time(); $t = "SearchTerm=" . $this->SearchTerm . "\nMinDate=" . $this->MinDate . "\nQueryTranslation=" . $this->Translation ."\nCreated=" . $Created ."\nUpdated=" . time() . "\nCount=" . $this->PostData['Searches'][$this->Stage[1]]['Count'] . "\nAuCount=" . count($this->CurResults) . "\n\n[Results]\n" . implode("\n", $a); $c = $this->GetCacheName(); // Save the cache to the file if (file_exists($c)) unlink($c); $f = fopen($c, 'w+'); fwrite($f, $t); fclose($f); unset($a, $t, $f, $o); if (!empty($_SESSION['Cleanup'])) foreach ($_SESSION['Cleanup'] as $f) { if (file_exists($f)) unlink($f); } // At this point we can delete the redundant XML files return true; } /**************************************************************************************************************************************** * Function: TempSave * - Makes a temporary file of results part way through the search just in-case it gets interrupted * - Moved out from SaveAuthors in v0.5 */ function TempSave() { Global $ncbi_CachePath; $c = $ncbi_CachePath . "tmp/" . trim($this->MinDate, ".") . "," . md5($this->Translation) . "," . $this->Stage[2]; foreach ($this->CurResults as $author => $count) { $a[] = $author . "=" . $count; } $t = "[Results]\n" . implode("\n", $a); // Stripped down version of SaveAuthors(); if (file_exists($c)) unlink($c); $f = fopen($c, 'w+'); fwrite($f, $t); fclose($f); $_SESSION['Cleanup'][] = $c; // Add this to the list of temporary files which will need deleting at the end of the search return true; } /**************************************************************************************************************************************** * Function TempRead * - Extracts saved authors from temporary files and reads them into the CurResults array */ function TempRead() { Global $ncbi_CachePath; $i = 50001; while ($i <= $this->PostData['Searches'][$this->Stage[1]]['Count']) { $c = $ncbi_CachePath . "tmp/" . trim($this->MinDate, ".") . "," . md5($this->Translation) . "," . $i; if (file_exists($c)) { $x = file_get_contents($c); $x = str_replace("\r\n", "\n", $x); // This is a fix for processing *nix generated author lists on windows servers $a = explode("[Results]\n", $x); unset($x, $c); $a = explode("\n", $a[1]); foreach ($a as $k => $r) { $r = explode("=", $r); if (empty($r[1])) continue; if (!empty($this->CurResults[$r[0]])) $this->CurResults[$r[0]] += $r[1]; else $this->CurResults[$r[0]] = $r[1]; unset($a[$k], $r); // Memory saving device -- very large datasets can cause the memory to exceed 512M here } $i += 50000; continue; } else break; } return true; } /**************************************************************************************************************************************** * Function: XmlParser * - A generic XML to Array function */ function XmlParser($input = false) { if (empty($input)) $input = $this->XML; // First parse into struct $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parse_into_struct($parser, $input, $vals, $index); xml_parser_free($parser); unset($input, $index); // From http://mysrc.blogspot.com/2007/02/php-xml-to-array-and-backwards.html $mnary=array(); $ary=&$mnary; foreach ($vals as $r) { $t=$r['tag']; if ($r['type']=='open') { if (isset($ary[$t])) { if (isset($ary[$t][0])) $ary[$t][]=array(); else $ary[$t]=array($ary[$t], array()); $cv=&$ary[$t][count($ary[$t])-1]; } else $cv=&$ary[$t]; if (isset($r['attributes'])) {foreach ($r['attributes'] as $k=>$v) $cv['_a'][$k]=$v;} $cv['_c']=array(); $cv['_c']['_p']=&$ary; $ary=&$cv['_c']; } elseif ($r['type']=='complete') { if (isset($ary[$t])) { // same as open if (isset($ary[$t][0])) $ary[$t][]=array(); else $ary[$t]=array($ary[$t], array()); $cv=&$ary[$t][count($ary[$t])-1]; } else $cv=&$ary[$t]; if (isset($r['attributes'])) {foreach ($r['attributes'] as $k=>$v) $cv['_a'][$k]=$v;} $cv['_v']=(isset($r['value']) ? $r['value'] : ''); } elseif ($r['type']=='close') { $ary=&$ary['_p']; } } unset($vals); $this->_del_p($mnary); return $mnary; } // _Internal: Remove recursion in result array function _del_p(&$ary) { foreach ($ary as $k => &$v) { if ($k === "_p") unset($ary[$k]); elseif (is_array($ary[$k])) $this->_del_p($ary[$k]); // Added JAD 24/5/08: Removes the _c/_v/_a when there's only one entry -- // makes the array easier to handle if (is_array($v) and (count($v) == 1) and isset($v['_v'])) $v = $v['_v']; elseif (is_array($v) and (count($v) == 1) and isset($v['_c'])) $v = $v['_c']; } } } ?>