. * * * * User defined settings * ~~~~~~~~~~~~~~~~~~~~~ * If you wish to run AssEd on your own server, you will need to change the three settings below. * You will also need to make sure that PHP is running with the sessions module installed and the HttpRequest extension available. * AssEd can use a lot of memory, storage, and bandwidth -- make sure you keep an eye on these. * */ // How should AssEd identify itself to Entrez when requesting data? $ncbi_Tool = "AssEd"; //What is your email address? $ncbi_Email = "you@yoursite.com"; // folder to store cached files in $ncbi_CachePath = "/path/to/your/htdocs/cache/"; /* * You should not edit anything other than those three lines, unless you really know what you're doing! * * * * Additional files required * ~~~~~~~~~~~~~~~~~~~~~~~~~ * * The form to enter data into AssEd is available at http://www.cotch.net/assed/index.html * * You will require PHP 5.x with sessions and HttpRequest modules installed * * ********************************************************************************************************************************************/ session_start(); if (isset($_POST['NewSession'])) session_unset(); // Check if this is a new search and, if so, delete old session variables $t = new AssEd(); // Run the object class AssEd { var $Stage; // array - allows us to remember where we are in the process: 0=search/filters, 1=search number, 2=stage within the search var $PostData; // array - everything in the search form -- see this->GetPostData() var $AuthorList; // array - the final list of authors during the filtering stage var $Strings; // array - search terms var $AllResults; // integer - count of searches (between 1 and 5) var $CurResults; // array - the results of an ongoing search var $Connection; // object - HttpRequest (PHP extension) for communicating with Entrez var $Xml; // string - result of an Entrez query in XML format var $Array = array(); // array - result of an Extrez query in array format (use $this->Array = $this->XmlParser()) var $WebEnv; // string - Entrez's "WebEnv" variable, which is basically a session key for the pubmed search - see http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html var $QueryKey; // int - Entrez's "QueryKey" variable -- I'm not sure this is actually important, or if I've been using it properly, but it has never been an issue so I have ignored it! var $QueryData; // array - contains the variables to be sent to Entrez in the next query -- see the relevant pages at http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html var $SearchTerm; // string - currently active search term var $MinDate; // string - currently active year limit (Note: this is not actually the minimum date, it is the number of years to go back to. The name is confusing for historical reasons!) var $Title; // string - the title for the output page var $Html; // string - the page specific HTML, for use in the OutputHtml function /******************************************************************************************************************************************** * Function: GetPostData() * General switching function and reloader of saved variables between * page loads. */ function AssEd() { Global $_GET; // Create HTTP connection $this->Connection = new HttpRequest; // Extract variables from the session cookies if (isset($_SESSION['Stage'])) $this->Stage = $_SESSION['Stage']; if (isset($_SESSION['PostData'])) $this->PostData = $_SESSION['PostData']; if (isset($_SESSION['WebEnv'])) $this->WebEnv = $_SESSION['WebEnv']; if (isset($_SESSION['QueryKey'])) $this->QueryKey = $_SESSION['QueryKey']; if (isset($_SESSION['AuthorList'])) $this->AuthorList = $_SESSION['AuthorList']; if (isset($_SESSION['CurResults'])) $this->CurResults = $_SESSION['CurResults']; $this->Html = ""; // Set up an empty string, for the sake of PHP compliance if (!isset($this->Stage)) { // If we are not already within a search, set things up and start the first one rolling $_SESSION['began'] = microtime(true); // Make a note of the start time, in order to calculate the time elapsed. $this->GetPostData(); // Extracts all of the form data $this->Stage = array("searches", 1, 1); // Note where we are in the searches for when the page reloads } // This checks if we are within a search -- either because we have just set up the first search (above) // or because the page has just reloaded and we are in the middle of another search if ($this->Stage[0] == "searches") { $this->PubmedSearch(); // This function sets up the searches -- either starting a new one, or continuing the existing one // If the search is incomplete, the program will have exited as the page reloads -- therefore, the following // events only happen if the search is complete $n = $this->Stage[1] + 1; if (!empty($this->PostData['Searches'][$n])) { // Check if there is another search to do and, if so, reset the stage variable and reload $this->Stage[1] = $n; // Reset the stage to that of the next search $this->Stage[2] = 1; } else $this->Stage[0] = "filters"; // Otherwise, reset the stage ready to begin the filters stage $this->SaveReload(); // And now reload the page } // If we are not in the middle of a search, we should be in the middle of the filters. If we're in neither, something must have gone wrong somewhere! elseif ($this->Stage[0] == "filters") { $this->RunFilters(); // All of the filters are called from within this function (they do not actually // need to be a separate function, but this way of doing things is the tidiest.) $this->Stage = array("complete", 0, 0); $this->PrintResults(); // This one prints the results -- as above, separating this one is for tidiness' sake if (!empty($_SESSION['Cleanup'])) foreach ($_SESSION['Cleanup'] as $f) { if (file_exists($f)) unlink($f); } // Finally, delete the redundant XML files before we leave } elseif ($this->Stage[0] == "complete") { if (isset($_GET['order'])) { // A reordering system $order = $_GET['order']; foreach ($this->AuthorList as $Author => $Items) { foreach ($Items as $Term => $Count) { $temp[$Term][$Author] = $Count; } } arsort($temp[$order]); foreach ($temp[$order] as $Author => $Count) { $temp2[$Author][$order] = $Count; } foreach ($temp as $Term => $Items) { foreach ($Items as $Author => $Count) { $temp2[$Author][$Term] = $Count; } } $this->AuthorList = $temp2; } $this->PrintResults(); } } /******************************************************************************************************************************************** * Function: GetPostData() * Gets the search info. This does not actually need to be a separate * function as it is only called from a single location, but this makes * things easier. */ function GetPostData() { for ($i = 1; $i <= 8; $i++) { // Loop through each the search strings saving the values for: $tSearchString = "SearchString-" . $i; // -- the search string itself $tAtLeast = "AtLeast-" . $i; // -- the minimum number of publications $tAtMost = "AtMost-" . $i; // -- and the maximum $tSince = "Since-" . $i; // -- the number of years to search (the name is slightly misleading as, originally, this was // a year (e.g. 'since 1998') rather than number of years (e.g. 'from the past 10 years') if (empty($_POST[$tSearchString])) { // We must, of couse, check that this search field was actually used: if it is empty, we are at $this->Strings = $i - 1; // the end of the loop and can save the global variable "Strings" (an integer count of all of the break; // searches, and then break from it. } $this->PostData['Searches'][$i] = array( // Otherwise, save the form data in the global "PostData" variable "SearchString" => trim($_POST[$tSearchString]), "AtLeast" => trim($_POST[$tAtLeast]), "AtMost" => trim($_POST[$tAtMost]), "Since" => trim($_POST[$tSince]) ); } // Save the seniority limits if (!empty($_POST['Active-1yr'])) { $this->PostData['Active'] = true; $this->PostData['AtLeast-1yr'] = $_POST['AtLeast-1yr']; $this->PostData['AtMost-1yr'] = $_POST['AtMost-1yr']; } elseif (!empty($_POST['Active-1yr-simple'])) { $this->PostData['Active'] = true; $this->PostData['AtLeast-1yr'] = 1; $this->PostData['AtMost-1yr'] = 1000; } else $this->PostData['Active'] = false; if (!empty($_POST['Active-5yr'])) { $this->PostData['Active-5yr'] = true; $this->PostData['AtLeast-5yr'] = $_POST['AtLeast-5yr']; $this->PostData['AtMost-5yr'] = $_POST['AtMost-5yr']; } elseif (!empty($_POST['Active-5yr-simple'])) { $this->PostData['Active-5yr'] = true; $this->PostData['AtLeast-5yr'] = 10; $this->PostData['AtMost-5yr'] = 1000; } else $this->PostData['Active-5yr'] = false; if (!empty($_POST['Active-All'])) { $this->PostData['Active-All'] = true; $this->PostData['AtLeast'] = $_POST['AtLeast']; $this->PostData['AtMost'] = $_POST['AtMost']; } else $this->PostData['Active-All'] = false; if (!empty($_POST['Active-Senior'])) { $this->PostData['Active-Senior'] = true; $this->PostData['AtLeastSe'] = $_POST['AtLeastSe']; $this->PostData['AtMostSe'] = $_POST['AtMostSe']; } else $this->PostData['Active-Senior'] = false; if (!empty($_POST['Active-Old'])) { $this->PostData['Active-Old'] = true; $this->PostData['LimOld'] = $_POST['LimOld']; $this->PostData['LimYoung'] = $_POST['LimYoung']; } else $this->PostData['Active-Old'] = false; if (!empty($_POST['Active-Old-Senior'])) { $this->PostData['Active-Old-Senior'] = true; $this->PostData['SeLimOld'] = $_POST['SeLimOld']; $this->PostData['SeLimYoung'] = $_POST['SeLimYoung']; } else $this->PostData['Active-Old-Senior'] = false; // Set the ignore list if (!empty($_POST['Ignore'])) $this->PostData['Ignore'] = $_POST['Ignore']; if (!empty($_POST['IgnoreCoAus'])) $this->PostData['IgnoreCoAus'] = $_POST['IgnoreCoAusYear']; return true; } /******************************************************************************************************************************************** * Function: OutputHtml( Reload ) * Formats the page as HTML */ function OutputHtml($Reload = false) { $Title = $this->Title; $Html = $this->Html; if ($Reload) $Reload = "\n"; else $Reload = ""; $Time = round((microtime(true) - $_SESSION['began']), 2); // Calculate the time elapsed echo << $Title $Reload

AssEdbeta: the semi-automatic assistant editor

$Html

... time elapsed: $Time seconds

END; exit; } /******************************************************************************************************************************************** * Function: SaveReload() * Saves variables and reloads the page to move on to the next stage */ function SaveReload() { $_SESSION['Stage'] = $this->Stage; $_SESSION['PostData'] = $this->PostData; $_SESSION['QueryKey'] = $this->QueryKey; if (isset($this->WebEnv)) $_SESSION['WebEnv'] = $this->WebEnv; if (isset($this->AuthorList)) $_SESSION['AuthorList'] = $this->AuthorList; if (isset($this->AllResults)) $_SESSION['AllResults'] = $this->AllResults; if (isset($this->CurResults)) $_SESSION['CurResults'] = $this->CurResults; if ($this->Stage[0] == "searches") { foreach ($this->PostData['Searches'] as $k => $Search) { if ($this->Stage[1] > $k) $this->Html .= "

\n

" . $Search['SearchString'] . ": 100% (" . $Search['Count'] . " / " . $Search['Count'] . ")

\n"; elseif (($this->Stage[1] == $k) and !empty($Search['Count'])) { $width = ($this->Stage[2] / $Search['Count']) * 600; $pc = round(($width / 6), 2); $width = round($width); $width2 = 600 - $width; $this->Html .= "

\n

" . $Search['SearchString'] . ": " . $pc . "% (" . $this->Stage[2] . " / " . $Search['Count'] . ")

\n"; } else $this->Html .= "

\n

" . $Search['SearchString'] . ": 0%

\n"; } $this->Html .= "

\n

Post-search filters: 0%

\n"; } else { if (empty($_SESSION['Filtered']) and isset($_SESSION['ToFilter'])) $_SESSION['Filtered'] = $_SESSION['ToFilter'] - count($this->AllResults[1]); foreach ($this->PostData['Searches'] as $k => $Search) { $this->Html .= "

\n

" . $Search['SearchString'] . ": 100% (" . $Search['Count'] . " / " . $Search['Count'] . ")

\n"; } if (isset($_SESSION['Filtered']) and isset($_SESSION['ToFilter'])) { $width = ($_SESSION['Filtered'] / $_SESSION['ToFilter']) * 600; $pc = round(($width / 6), 2); $width = round($width); $width2 = 600 - $width; $this->Html .= "

\n

Applying limits and filters: " . $pc . "%

\n"; } else $this->Html .= "

\n

Post-search filters: 0%

\n"; } $this->Title = "AssEd: Processing searches..."; $this->OutputHtml(true); } /******************************************************************************************************************************************** * Function: PrintResults() * Prints the search results */ function PrintResults() { $_SESSION['Stage'] = $this->Stage; // Save these two variables into the session to enable re-ordering if (isset($this->AuthorList)) $_SESSION['AuthorList'] = $this->AuthorList; $this->Title = "AssEd: Search complete"; if (empty($this->AuthorList)) $this->Html .= "

Sorry, it would appear that nobody has that particularly combination of skills.

\n"; else { $this->Html .= "
\n\n"; $this->Html .= "\n"; $this->Html .= "\n"; // Print the column headers $i = 0; foreach ($this->PostData['Searches'] as $Search) { $i += 1; $this->Html .= "\n"; } // Check the limits if ($this->PostData['Active-All']) $this->Html .= "\n"; if ($this->PostData['Active-5yr']) $this->Html .= "\n"; if ($this->PostData['Active']) $this->Html .= "\n"; if ($this->PostData['Active-Senior']) $this->Html .= "\n"; $this->Html .= "\n"; // Loop through the Authors $j = count($this->PostData['Searches']); foreach ($this->AuthorList as $Author => $Items) { $this->Html .= "\n"; // Loop through the search terms for ($i = 1; $i <= $j; $i++) { $this->Html .= "\n"; } // Check the limits if ($this->PostData['Active-All']) $this->Html .= "\n"; if ($this->PostData['Active-5yr']) { $t = array(date('Y'), date('m'), date('d')); $pdat2 = implode("/", $t); $t[0] -= 5; $pdat1 = implode("/", $t); $this->Html .= "\n"; } if ($this->PostData['Active']) { $t = array(date('Y'), date('m'), date('d')); $pdat2 = implode("/", $t); $t[0] -= 1; $pdat1 = implode("/", $t); $this->Html .= "\n"; } if ($this->PostData['Active-Senior']) $this->Html .= "\n"; $this->Html .= "\n"; } $this->Html .= "
Author" . $Search['SearchString'] . "All pubs 5yr 1yr Last Au
" . $Author . "PostData['Searches'][$i]['SearchString'] . "\">" . $Items[$i] . "" . $Items['All'] . "" . $Items['5yr'] . "" . $Items['1yr'] . "" . $Items['Senior'] . "
\n
\n"; } $this->OutputHtml(false); } /************************************************************************************************************************************************* NCBI / ENTREZ FUNCTIONS *********************** 1. GetXmlFile 2. -GetCounts- -- Removed in stripped down version, PubmedIDs gives counts 3. PubmedIds 4. PubmedSummaries 5. PubmedSearch 6. PubmedAuthors 7. FilterSearches **************************************************************************************************************************************************/ /**************************************************************************************************************************************** * Function: GetXmlFile ( URL, QueryData ) * - Runs an HTTP Request or a MySql query depending on whether cache * is switched on or not */ function GetXmlFile($Url, $QueryData) { Global $ncbi_Tool, $ncbi_Email, $ncbi_CachePath; $QueryData2 = $QueryData; // Start by making a filename for the cache unset($QueryData2['WebEnv']); // Remove the WebEnv setting -- it prevents a cached file being used accross sessions foreach ($QueryData2 as &$temp) { // Remove troublesome charactors... $temp = str_replace("/","",$temp); // Need to remove '/' from query data -- as used in mindate/maxday querydata, to prevent filesystem errors $temp = trim($temp, "."); // Remove surrounding dots from the data -- I don't think there is any way this could actually be used in an attack, but it's best to be safe! $temp = trim($temp); // And any accidental whitespace -- without this, one would waste time if the correctly entered version is already cached } if ($this->Stage[0] == "searches") { $filename = $ncbi_CachePath . "xml_files/" . trim($this->MinDate, ".") . "," . trim($this->SearchTerm, ".") . "," . implode(",", $QueryData2) . ".txt"; $_SESSION['Cleanup'][] = $filename; // Items in this session variable will deleted when the search completes -- they should never be needed again } elseif ($this->Stage[0] == "filters") { // Because there are so many authors to check, we need to break them into separate folders somehow $dir = $ncbi_CachePath . "user_records/" . substr(str_replace("\"","",$QueryData2['term']), 0, 1) . "/"; if (!is_dir($dir)) mkdir($dir); $filename = $dir . str_replace("[Au]","", str_replace("\"","",implode(",", $QueryData2))) . ".txt"; } if (file_exists($filename)) { // We need to check whether we already have the search cached $e = time() - 604800; // If the cached file is older than one week old, we may as well if (filectime($filename) < $e) unlink($filename); // just delete it and start again. else { $this->Xml = file_get_contents($filename); // But otherwise, we can read it into the Xml global, and exit the function return true; } } $QueryData['tool'] = $ncbi_Tool; // Add tool and email to QueryData -- this is requested by ncbi to help them $QueryData['email'] = $ncbi_Email; // deal with any problems that may arise. Set these in the "user settings" $this->Connection->setUrl ( $Url ); // Now send the URL and QueryData array to the Connection object -- this uses $this->Connection->setQueryData ( $QueryData ); // PHP's HttpRequest extension try { $this->Connection->send(); } catch (Exception $e) { // HttpRequest is really bad at handling connection errors -- we need to catch exceptions and reload the page when these errors occur $this->Html .= "

Warning: caught an exception when trying to retreive the XML data from PubMed. I am going to reload the page and hope that the problem goes away. The error given was: " . $e->getMessage() . "

\n"; $this->SaveReload(); } $this->Xml = $this->Connection->GetResponseBody(); // Convert the response into a string, saved in the global 'Xml' $f = fopen($filename, 'w'); // Cache the new file fwrite($f, $this->Xml); fclose($f); if (!file_exists($filename)) $this->Html .= "

Warning: could not save cache file (" . $filename . ")

"; return true; } /*************************************************************************************************************************************** * Function: PubmedIds ( ) * - Gets a list of PubMed IDs and saves an Entrez session variable for later retrieval of the full items */ function PubmedIds($tidy = false, $mindate = false, $maxdate = false) { Global $ncbi_Tool, $ncbi_Email; // Die on error -- This should NEVER occur, and can probably be deleted. if (empty($this->SearchTerm)) die( "

Error: missing search term for new search in this->PubmedIds.

" ); // Construct the Query Data $QueryData = array( "db" => "pubmed", "retmode" => "xml", "usehistory" => "y", "term" => $this->SearchTerm, ); if ($mindate and $maxdate) { $QueryData['mindate'] = $mindate; $QueryData['maxdate'] = $maxdate; } elseif (!empty($this->PostData['Searches'][$this->Stage[1]]['Since'])) { $y = date('Y') - $this->PostData['Searches'][$this->Stage[1]]['Since']; $QueryData['mindate'] = $y . "/" . date('m') . "/01"; $QueryData['maxdate'] = date('Y/m/01'); } $this->GetXmlFile( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", $QueryData ); // Get the XML file $this->Array = $this->XmlParser( $this->Xml ); // Parse it unset($this->Xml); // clean up! $this->QueryKey = $this->Array['eSearchResult']['QueryKey']; // Save entrez's "session" variables $this->WebEnv = $this->Array['eSearchResult']['WebEnv']; if (empty($this->Array['eSearchResult']['Count'])) { // Check that pubmed actually has some entries matching the query $this->Html .= "

Sorry, PubMed has no items matching your search for " . $QueryData['term'] . ". Check your spelling, or try an older date cutoff.

\n"; $this->OutputHtml(); exit; } $this->PostData['Searches'][$this->Stage[1]]['Count'] = $this->Array['eSearchResult']['Count']; // Update the counts (GetCounts() does not support data ranges) if (!empty($tidy)) { // If we're being tidy, remove all the old variables and return true unset($this->Array); return true; } return $this->Array; // Otherwise, return the array } /**************************************************************************************************************************************** * Function: PubmedSearch() * I broke this out from AssEd() because I was using the same bit of code three times. This checks whether we've done the initial * PubmedIds() search, and if not, runs it. Then it's just the ExtractAuthors. * AssEd is actually much more efficiently arranged now, so it does not actually need to be separate, but, hey, it's neater this way. * The cache updater section of this function was originally a separate function, but I kept making it more code efficient, and eventually merged. */ function PubmedSearch() { Global $ncbi_CachePath; $this->SearchTerm = trim($this->PostData['Searches'][$this->Stage[1]]['SearchString']); $this->MinDate = trim($this->PostData['Searches'][$this->Stage[1]]['Since']); /***/ // Check if we have a cached author list $c = $ncbi_CachePath . "author_lists/" . trim($this->MinDate, ".") . "," . str_replace("/", "", trim($this->SearchTerm, ".")) . ".txt"; if (file_exists($c)) { $x = file_get_contents($c); $x = explode("[Results]", $x); $y = explode("\n", $x[0]); foreach ($y as $v) { $v = explode("=", $v); if ($v[0] == "Updated") $u = trim($v[1]); if ($v[0] == "Count") $this->PostData['Searches'][$this->Stage[1]]['Count'] = trim($v[1]); } if (empty($u)) $u = filectime($c); // check whether we need to update the file $updated = array( 'Y' => date('Y', $u), 'm' => date('m', $u), 'd' => "01" ); $now = array( 'Y' => date('Y'), 'm' => date('m'), 'd' => "01" ); if (($updated['Y'] == date('Y')) and ($updated['m'] == date('m'))) return true; // If we don't, we can just exit this search else { /*** CACHE UPDATER ***/ $a = explode("\n", $x[1]); // But if we do, we will first need to extract foreach ($a as $k => $r) { // the cached author list $r = explode("=", $r); $this->CurResults[$r[0]] = $r[1]; } $this->PubmedIDs(true, implode("/", $updated), implode("/", $now)); // Get the new items $this->PubmedExtractAuthors(true, false, true); $updated[0] -= $this->MinDate; $now[0] -= $this->MinDate; $this->PubmedIDs(true, implode("/", $updated), implode("/", $now)); // And remove the old ones $this->PubmedExtractAuthors(true, true, true); $this->SaveAuthors(); $this->PostData['Searches'][$this->Stage[1]]['Count'] = count($this->CurResults); return true; } } /***/ // Otherwise, run the search // if limits haven't been set, do that now. if (($this->Stage[2] == 1) OR (empty($this->WebEnv))) $this->PubmedIds(true); if ($this->PubmedExtractAuthors() === false) exit; /***/ // We will only ever get this far if the search has completed // - so, we can now cache the author list $this->SaveAuthors(); unset($this->WebEnv, $_SESSION['WebEnv'], $this->CurResults, $_SESSION['CurResults']); return true; } /**************************************************************************************************************************************** * Function PubmedExtractAuthors ( single, reverse, noisy ) * - Takes the search and finds all authors * I'm not entirely sure what the most efficient way of doing this is. I have tried using the XML>Array function, but this is * insanely memory intensive. The simple loop and text-search works, but I'm not convinced that there aren't better ways (a * preg_match, perhaps?) to do it. * Input vars: * - Single: gets all results in a single file rather than 1000 items at a time (used when updating caches at the start of the month) * - Reverse: subtracts matching authors from the list, instead of increasing their counts * - Noisy: debug mode -- prints the list of changed authors */ function PubmedExtractAuthors($single = false, $reverse = false, $noisy = false) { // Run the search $this->QueryData = array( "db" => "pubmed", "retmode" => "xml", "usehistory" => "y", "WebEnv" => $this->WebEnv, "query_key" => $this->QueryKey, "retstart" => $this->Stage[2], "retmax" => "1000" ); if ($single) $this->QueryData['retmax'] = $this->PostData['Searches'][$this->Stage[1]]['Count']; // If we're retrieving this as a single file, amend the "retmax" to get all of the results $this->GetXmlFile( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", $this->QueryData ); // Get the XML file if ($this->Stage[1] > 0) if (!empty($_SESSION['AllResults'])) $this->CurResults = $_SESSION['AllResults'][$this->Stage[1]]; // Load the author list, or create empty author list array $lines = explode("\n", $this->Xml); // Extract all of the authors from the XML string foreach ($lines as $line) { $line = trim($line); if (substr($line, 0, 34) == "") { $NewAuthor = substr($line, 34, -7); if ($reverse) { // Check if we're in reverse mode $this->CurResults[$NewAuthor] -= 1; // if so - reduce count rather than increase it if ($noisy) $this->Html .= "

Debug: updated cached results file: reduced count for $NewAuthor to " . $this->CurResults[$NewAuthor]; if (empty($this->CurResults[$NewAuthor])) unset($this->CurResults[$NewAuthor]); // And clean up if the author no longer has any pubs } else { if (!empty($this->CurResults[$NewAuthor])) $this->CurResults[$NewAuthor] += 1; else $this->CurResults[$NewAuthor] = 1; if ($noisy) $this->Html .= "

Debug: updated cached results file: increased count for $NewAuthor to " . $this->CurResults[$NewAuthor]; } } } if ($single) return true; // If we are running in single-search mode, it is time to return $this->Stage[2] += 1000; // Otherwise, move on to the next 100 searches if ($this->Stage[2] < $this->PostData['Searches'][$this->Stage[1]]['Count']) $this->SaveReload(); else return true; // Or, if there are no more searches to do, we can move on } /******************************************************************************************************************************************** * Function: RunFilters() * Filters the author lists */ function RunFilters() { Global $ncbi_CachePath; // Get results from the files if (!empty($_SESSION['AllResults'])) $this->AllResults = $_SESSION['AllResults']; else { foreach ($this->PostData['Searches'] as $i => $Search) { $c = $ncbi_CachePath . "author_lists/" . trim($Search['Since'], ".") . "," . str_replace("/", "", trim($Search['SearchString'], ".")) . ".txt"; if (!file_exists($c)) die( "Cache error" ); $x = file_get_contents($c); $x = str_replace("\r\n", "\n", $x); // This is a fix for processing *nix generated author lists on windows servers $a = explode("[Results]\n", $x); $a = explode("\n", $a[1]); foreach ($a as $k => $r) { $r = explode("=", $r); if (empty($r[1])) $this->Html .= "

Debug: Invalid entry at line " . $k; if (($r[1] < $Search['AtLeast']) OR ($r[1] > $Search['AtMost'])) continue; $this->AllResults[$i][$r[0]] = $r[1]; } unset($x, $a, $c); } arsort($this->AllResults[1]); } if (empty($_SESSION['ToFilter'])) $_SESSION['ToFilter'] = count($this->AllResults[1]); // Filter $j = count($this->PostData['Searches']); $k = 0; $l = 0; foreach ($this->AllResults[1] as $key => $value) { $r = true; // Check the author shows up in all lists foreach ($this->AllResults as $AuthorList) { if (empty($AuthorList[$key])) $r = false; } // Now check the author has enough pubs if ($r) { if (!$AuStats = $this->PubmedAuLimits($key)) $r = false; $k += 1; } // Exclude based on the ignore list if ($r AND isset($this->PostData['Ignore']) and isset($this->PostData['IgnoreCoAus'])) { if ($this->PubmedIgnore($key)) $r = false; $k += 1; } if ($r != false) { $this->AuthorList[$key][1] = $value; for ($i = 2; $i <= $j; $i++) { $this->AuthorList[$key][$i] = $this->AllResults[$i][$key]; unset($this->AllResults[$i][$key]); } if (isset($AuStats['All'])) $this->AuthorList[$key]['All'] = $AuStats['All']; if (isset($AuStats['5yr'])) $this->AuthorList[$key]['5yr'] = $AuStats['5yr']; if (isset($AuStats['1yr'])) $this->AuthorList[$key]['1yr'] = $AuStats['1yr']; if (isset($AuStats['Senior'])) $this->AuthorList[$key]['Senior'] = $AuStats['Senior']; } unset($this->AllResults[1][$key]); $l += 1; if ($k > 200) { if (!empty($_SESSION['Filtered'])) $_SESSION['Filtered'] += $l; else $_SESSION['Filtered'] = $l; $this->Stage = array("filters", 0, 0); $this->SaveReload(); exit; } continue; } return true; } /**************************************************************************************************************************************** * Function: PubmedAuLimits ( Author ) * - Checks whether the author meets the seniority requirements */ function PubmedAuLimits($Au) { $Author = "\"" . $Au . "\""; $Url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"; $QueryData = array( "db" => "pubmed", "retmode" => "xml", "rettype" => "count", "usehistory" => "y", "term" => $Author, ); /* 1: Total number of publications */ if ($this->PostData['Active-All']) { $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); $k = $this->Array['eSearchResult']['Count']; if (($k < $this->PostData['AtLeast']) OR ($k > $this->PostData['AtMost'])) return false; $r['All'] = $k; } /* 2: Between A and B pubs in past five years */ if ($this->PostData['Active-5yr']) { $QueryData['reldate'] = "1825"; $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); $c = $this->Array['eSearchResult']['Count']; if (($c < $this->PostData['AtLeast-5yr']) OR ($c > $this->PostData['AtMost-5yr'])) return false; $r['5yr'] = $c; } /* 3: At least X pubs in past year */ if ($this->PostData['Active']) { $QueryData['reldate'] = "365"; $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); $y = $this->Array['eSearchResult']['Count']; if (($y < $this->PostData['AtLeast-1yr']) OR ($y > $this->PostData['AtMost-1yr'])) return false; $r['1yr'] = $y; } if (isset($QueryData['reldate'])) unset($QueryData['reldate']); /* 4: At least X pubs as senior author */ $QueryData['term'] .= "[lastau]"; // Amend the query term to last author if ($this->PostData['Active-Senior']) { $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); $s = $this->Array['eSearchResult']['Count']; if (($s < $this->PostData['AtLeastSe']) OR ($s > $this->PostData['AtMostSe'])) return false; $r['Senior'] = $s; } /* 5. Years began publishing */ $QueryData['term'] = $Author; if ($this->PostData['Active-Old']) { $mindate = array((date('Y')-$this->PostData['LimOld']), date('m'), "01"); $maxdate = array((date('Y')-$this->PostData['LimYoung']), date('m'), "01"); $QueryData['mindate'] = "1900/01/01"; $QueryData['maxdate'] = implode("/", $mindate); // See if there is anything older than the old cutoff $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); if (!empty($this->Array['eSearchResult']['Count'])) return false; $QueryData['mindate'] = implode("/", $mindate); // And make sure that there is something older than the yound cutoff $QueryData['maxdate'] = implode("/", $maxdate); $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); if (empty($this->Array['eSearchResult']['Count'])) return false; } /* 6: Years began publishing as senior author */ $QueryData['term'] .= "[lastau]"; if ($this->PostData['Active-Old-Senior']) { $mindate = array((date('Y')- $this->PostData['SeLimOld']), date('m'), "01"); $maxdate = array((date('Y')- $this->PostData['SeLimYoung']), date('m'), "01"); $QueryData['mindate'] = "1900/01/01"; $QueryData['maxdate'] = implode("/", $mindate); // See if there is anything older than the old cutoff $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); if (!empty($this->Array['eSearchResult']['Count'])) return false; $QueryData['mindate'] = implode("/", $mindate); // And make sure that there is something older than the yound cutoff $QueryData['maxdate'] = implode("/", $maxdate); $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); if (empty($this->Array['eSearchResult']['Count'])) return false; } return $r; } /**************************************************************************************************************************************** * Function: PubmedIgnore ( Author ) * - Checks whether the author matches the entries in the ignore list */ function PubmedIgnore($key) { $s = $key . " AND (" . $this->PostData['Ignore'] . ")"; $Url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"; $QueryData = array( "db" => "pubmed", "retmode" => "xml", "rettype" => "count", "usehistory" => "y", "mindate" => $this->PostData['IgnoreCoAus'], "maxdate" => date('Y'), "term" => $s, ); $this->GetXmlFile($Url, $QueryData); $this->Array = $this->XmlParser($this->Xml); if (!empty($this->Array['eSearchResult']['Count'])) return true; return false; } /**************************************************************************************************************************************** * Function: SaveAuthors * - I wanted to use an XML layout for the saved authors list, but that * considerably raises the amount of memory required (e.g. to get the array * back out of the xml file). Therefore, we have to put up with a * cheap and dirty solution */ function SaveAuthors() { Global $ncbi_CachePath; $cutoff = 1; // If there are too many results, we can ignore all who have only one item $total = count($this->CurResults); // for the sake of performance if ($total > 250000) $cutoff = 2; $c = $ncbi_CachePath . "author_lists/" . trim($this->MinDate, ".") . "," . str_replace("/", "", trim($this->SearchTerm, ".")) . ".txt"; if (file_exists($c)) unlink($c); $f = fopen($c, 'w+'); $t = "Updated=" . time() . "\nCount=" . count($this->CurResults) . "\n[Results]\n"; foreach ($this->CurResults as $author => $count) { if ($count >= $cutoff) $a[] = $author . "=" . $count; } $t .= implode("\n", $a); fwrite($f, $t); fclose($f); unset($a, $t); return true; } /**************************************************************************************************************************************** * Function: XmlParser * - A generic XML to Array function */ function XmlParser($input = false) { if (empty($input)) $input = $this->XML; // First parse into struct $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parse_into_struct($parser, $input, $vals, $index); xml_parser_free($parser); unset($input, $index); // From http://mysrc.blogspot.com/2007/02/php-xml-to-array-and-backwards.html $mnary=array(); $ary=&$mnary; foreach ($vals as $r) { $t=$r['tag']; if ($r['type']=='open') { if (isset($ary[$t])) { if (isset($ary[$t][0])) $ary[$t][]=array(); else $ary[$t]=array($ary[$t], array()); $cv=&$ary[$t][count($ary[$t])-1]; } else $cv=&$ary[$t]; if (isset($r['attributes'])) {foreach ($r['attributes'] as $k=>$v) $cv['_a'][$k]=$v;} $cv['_c']=array(); $cv['_c']['_p']=&$ary; $ary=&$cv['_c']; } elseif ($r['type']=='complete') { if (isset($ary[$t])) { // same as open if (isset($ary[$t][0])) $ary[$t][]=array(); else $ary[$t]=array($ary[$t], array()); $cv=&$ary[$t][count($ary[$t])-1]; } else $cv=&$ary[$t]; if (isset($r['attributes'])) {foreach ($r['attributes'] as $k=>$v) $cv['_a'][$k]=$v;} $cv['_v']=(isset($r['value']) ? $r['value'] : ''); } elseif ($r['type']=='close') { $ary=&$ary['_p']; } } unset($vals); $this->_del_p($mnary); return $mnary; } // _Internal: Remove recursion in result array function _del_p(&$ary) { foreach ($ary as $k => &$v) { if ($k === "_p") unset($ary[$k]); elseif (is_array($ary[$k])) $this->_del_p($ary[$k]); // Added JAD 24/5/08: Removes the _c/_v/_a when there's only one entry -- // makes the array easier to handle if (is_array($v) and (count($v) == 1) and isset($v['_v'])) $v = $v['_v']; elseif (is_array($v) and (count($v) == 1) and isset($v['_c'])) $v = $v['_c']; } } } ?>