SearchTerm equal to a specified string
DisplayURL in the nested Ads collection which does not contain a specified string
Position in the nested Ads collection which does not contain a specified string
The query to do this is at the bottom of the page. First a bit of background.
My mongodb instance contained 3 million of these records. When I ran the query the first time, it took around 7-8 seconds to complete. I made the index:
ensureIndex({ SearchTerm:1 }, { "Ads.DisplayURL":1 })
It then took less than 0.1 seconds to run.
Here is the json document I am using:
{
"LocalDay": "2013-11-05T00:00:00",
"PageIndex": 1,
"PageId": 961425400,
"Ads": [
{
"Rating": "",
"DisplayDomain": "",
"Title": "(ACE) Top 25 at home exercises - American Council On Exercise",
"ClickURL": "http:\/\/www.acefitness.org\/acefit\/fitness-programs-article\/2863\/Top-25-At-Home-Exercises\/",
"Rank": 1,
"DisplayURL": "www.test.com",
"Description2": "",
"UrlDomain": "",
"Description1": "",
"Position": "N"
},
{
"Rating": "",
"DisplayDomain": "",
"Title": "The Ultimate Home Workout - Shape",
"ClickURL": "http:\/\/www.shape.com\/fitness\/workouts\/ultimate-home-workout",
"Rank": 3,
"DisplayURL": "www.test.com",
"Description2": "",
"UrlDomain": "",
"Description1": "",
"Position": "N"
}
],
"PagesDeep": 1,
"DBStoreTime": "2013-11-05T09:56:12.627",
"HTML": "",
"LocationId": 2,
"VariantId": 2,
"NumNatural": 10,
"SearchTerm": "exercise at home",
"BrowseTime": 1516,
"ServerId": 2454,
"Blocked": "False",
"Page1TopAds": "",
"EngineId": 6,
"utcstoretime": 1383645873,
"SearchTermId": 1643017
}
The working query:
db.extractedads.find({
"SearchTerm" : "some search"
}, {
"Ads": {
$elemMatch: {
Position: { "$regex" : "^((?!somestring).)*$" },
DisplayURL: { "$regex" : "^((?!somestring).)*$" }
}
}
})
Or in one line so that you can run in the mongodb console:
db.extractedads.find({"SearchTerm" : "somestring" }, {"Ads": { $elemMatch: { Position: { "$regex" : "^((?!somestring).)*$" }, DisplayURL: { "$regex" : "^((?!somestring).)*$" } } } })
No comments:
Post a Comment